From 1a3b757650e49ab1d3495dc70d3604ea9e47abf7 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 24 Jan 2021 16:38:28 -0800 Subject: [PATCH 1/4] CI: fix PandasArray test --- pandas/tests/extension/test_numpy.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 4e054a07d8ef1..753ad7bb3728d 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -313,6 +313,10 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators): def test_arith_series_with_array(self, data, all_arithmetic_operators): super().test_arith_series_with_array(data, all_arithmetic_operators) + @skip_nested + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): + super().test_arith_frame_with_scalar(data, all_arithmetic_operators) + class TestPrinting(BaseNumPyTests, base.BasePrintingTests): pass From 7bd6d83103e852feb889f9a4d596ca17ff5fafdc Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 25 Jan 2021 19:21:43 -0800 Subject: [PATCH 2/4] ENH: recognize Decimal(nan) in pd.isna --- pandas/_libs/missing.pyx | 16 ++++- pandas/_testing/__init__.py | 3 +- pandas/core/dtypes/missing.py | 11 +++- pandas/tests/dtypes/cast/test_promote.py | 8 ++- pandas/tests/dtypes/test_missing.py | 58 ++++++++++++++----- pandas/tests/extension/base/interface.py | 3 +- .../tests/extension/decimal/test_decimal.py | 13 ----- pandas/tests/indexes/test_index_new.py | 6 ++ pandas/tests/indexes/test_numeric.py | 10 +++- pandas/tests/io/json/test_pandas.py | 7 ++- pandas/tests/tools/test_to_datetime.py | 11 +++- pandas/tests/util/test_assert_series_equal.py | 10 ++++ 12 files changed, 118 insertions(+), 38 deletions(-) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index abf38265ddc6d..e83c11ee1e90e 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -1,3 +1,4 @@ +from decimal import Decimal import numbers import cython @@ -28,6 +29,8 @@ cdef: bint is_32bit = not IS64 + type cDecimal = Decimal # for faster isinstance checks + cpdef bint checknull(object val): """ @@ -53,7 +56,18 @@ cpdef bint checknull(object val): The difference between `checknull` and `checknull_old` is that `checknull` does *not* consider INF or NEGINF to be NA. """ - return val is C_NA or is_null_datetimelike(val, inat_is_null=False) + return ( + val is C_NA + or is_null_datetimelike(val, inat_is_null=False) + or is_decimal_na(val) + ) + + +cdef inline bint is_decimal_na(object val): + """ + Is this a decimal.Decimal object Decimal("NAN"). + """ + return isinstance(val, cDecimal) and val != val cpdef bint checknull_old(object val): diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 0b2be53131af6..c8b8e12697d3c 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -2,6 +2,7 @@ import collections from datetime import datetime +from decimal import Decimal from functools import wraps import operator import os @@ -138,7 +139,7 @@ + BYTES_DTYPES ) -NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA] +NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA, Decimal("NaN")] EMPTY_STRING_PATTERN = re.compile("^$") diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index f0455c01fa085..56a03cb80fa43 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -1,6 +1,7 @@ """ missing types & inference """ +from decimal import Decimal from functools import partial import numpy as np @@ -606,15 +607,19 @@ def is_valid_nat_for_dtype(obj, dtype: DtypeObj) -> bool: if not lib.is_scalar(obj) or not isna(obj): return False if dtype.kind == "M": - return not isinstance(obj, np.timedelta64) + return not isinstance(obj, (np.timedelta64, Decimal)) if dtype.kind == "m": - return not isinstance(obj, np.datetime64) + return not isinstance(obj, (np.datetime64, Decimal)) if dtype.kind in ["i", "u", "f", "c"]: # Numeric return obj is not NaT and not isinstance(obj, (np.datetime64, np.timedelta64)) + if dtype == np.dtype(object): + # This is needed for Categorical, but is kind of weird + return True + # must be PeriodDType - return not isinstance(obj, (np.datetime64, np.timedelta64)) + return not isinstance(obj, (np.datetime64, np.timedelta64, Decimal)) def isna_all(arr: ArrayLike) -> bool: diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 89b45890458c5..a3f57ddae35db 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -3,6 +3,7 @@ """ import datetime +from decimal import Decimal import numpy as np import pytest @@ -575,7 +576,12 @@ def test_maybe_promote_any_numpy_dtype_with_na(any_numpy_dtype_reduced, nulls_fi fill_value = nulls_fixture dtype = np.dtype(any_numpy_dtype_reduced) - if is_integer_dtype(dtype) and fill_value is not NaT: + if isinstance(fill_value, Decimal): + # Subject to change, but ATM (When Decimal(NAN) is being added to nulls_fixture) + # this is the existing bheavior in maybe_promote + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + elif is_integer_dtype(dtype) and fill_value is not NaT: # integer + other missing value (np.nan / None) casts to float expected_dtype = np.float64 exp_val_for_scalar = np.nan diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index c02185dd82043..57e89d36b8eb7 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -300,6 +300,43 @@ def test_period(self): tm.assert_series_equal(isna(s), exp) tm.assert_series_equal(notna(s), ~exp) + def test_decimal(self): + # scalars GH#23530 + a = Decimal(1.0) + assert pd.isna(a) is False + assert pd.notna(a) is True + + b = Decimal("NaN") + assert pd.isna(b) is True + assert pd.notna(b) is False + + # array + arr = np.array([a, b]) + expected = np.array([False, True]) + result = pd.isna(arr) + tm.assert_numpy_array_equal(result, expected) + + result = pd.notna(arr) + tm.assert_numpy_array_equal(result, ~expected) + + # series + ser = Series(arr) + expected = Series(expected) + result = pd.isna(ser) + tm.assert_series_equal(result, expected) + + result = pd.notna(ser) + tm.assert_series_equal(result, ~expected) + + # index + idx = pd.Index(arr) + expected = np.array([False, True]) + result = pd.isna(idx) + tm.assert_numpy_array_equal(result, expected) + + result = pd.notna(idx) + tm.assert_numpy_array_equal(result, ~expected) + @pytest.mark.parametrize("dtype_equal", [True, False]) def test_array_equivalent(dtype_equal): @@ -598,24 +635,22 @@ def test_empty_like(self): class TestLibMissing: - def test_checknull(self): - for value in na_vals: - assert libmissing.checknull(value) + @pytest.mark.parametrize("func", [libmissing.checknull, isna]) + def test_checknull(self, func): + for value in na_vals + sometimes_na_vals: + assert func(value) for value in inf_vals: - assert not libmissing.checknull(value) + assert not func(value) for value in int_na_vals: - assert not libmissing.checknull(value) - - for value in sometimes_na_vals: - assert not libmissing.checknull(value) + assert not func(value) for value in never_na_vals: - assert not libmissing.checknull(value) + assert not func(value) def test_checknull_old(self): - for value in na_vals: + for value in na_vals + sometimes_na_vals: assert libmissing.checknull_old(value) for value in inf_vals: @@ -624,9 +659,6 @@ def test_checknull_old(self): for value in int_na_vals: assert not libmissing.checknull_old(value) - for value in sometimes_na_vals: - assert not libmissing.checknull_old(value) - for value in never_na_vals: assert not libmissing.checknull_old(value) diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index 6a4ff68b4580f..57ccaf4df8cf6 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -49,7 +49,8 @@ def test_contains(self, data, data_missing): # the data can never contain other nan-likes than na_value for na_value_obj in tm.NULL_OBJECTS: - if na_value_obj is na_value: + if na_value_obj is na_value or type(na_value_obj) == type(na_value): + # type check for e.g. two instances of Decimal("NAN") continue assert na_value_obj not in data assert na_value_obj not in data_missing diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 63980b628b8d2..012823f2097c7 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -166,13 +166,6 @@ class TestBooleanReduce(Reduce, base.BaseBooleanReduceTests): class TestMethods(BaseDecimal, base.BaseMethodsTests): @pytest.mark.parametrize("dropna", [True, False]) def test_value_counts(self, all_data, dropna, request): - if any(x != x for x in all_data): - mark = pytest.mark.xfail( - reason="tm.assert_series_equal incorrectly raises", - raises=AssertionError, - ) - request.node.add_marker(mark) - all_data = all_data[:10] if dropna: other = np.array(all_data[~all_data.isna()]) @@ -200,12 +193,6 @@ class TestCasting(BaseDecimal, base.BaseCastingTests): class TestGroupby(BaseDecimal, base.BaseGroupbyTests): - def test_groupby_apply_identity(self, data_for_grouping, request): - if any(x != x for x in data_for_grouping): - mark = pytest.mark.xfail(reason="tm.assert_series_equal raises incorrectly") - request.node.add_marker(mark) - super().test_groupby_apply_identity(data_for_grouping) - def test_groupby_agg_extension(self, data_for_grouping): super().test_groupby_agg_extension(data_for_grouping) diff --git a/pandas/tests/indexes/test_index_new.py b/pandas/tests/indexes/test_index_new.py index de0850d37034d..4fba4b13835b3 100644 --- a/pandas/tests/indexes/test_index_new.py +++ b/pandas/tests/indexes/test_index_new.py @@ -1,6 +1,8 @@ """ Tests for the Index constructor conducting inference. """ +from decimal import Decimal + import numpy as np import pytest @@ -89,6 +91,10 @@ def test_constructor_infer_periodindex(self): def test_constructor_infer_nat_dt_like( self, pos, klass, dtype, ctor, nulls_fixture, request ): + if isinstance(nulls_fixture, Decimal): + # We dont cast these to datetime64/timedelta64 + return + expected = klass([NaT, NaT]) assert expected.dtype == dtype data = [ctor] diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index e391b76ddbd15..215a6062b185b 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -1,4 +1,5 @@ from datetime import datetime +from decimal import Decimal import numpy as np import pytest @@ -97,12 +98,17 @@ def test_numeric_compat(self): def test_insert_na(self, nulls_fixture): # GH 18295 (test missing) index = self.create_index() + na_val = nulls_fixture - if nulls_fixture is pd.NaT: + if na_val is pd.NaT: expected = Index([index[0], pd.NaT] + list(index[1:]), dtype=object) + elif isinstance(na_val, Decimal) and not isinstance(index, Float64Index): + # TODO: decide if this is the desired behavior + expected = Index([index[0], na_val] + list(index[1:]), dtype=object) else: expected = Float64Index([index[0], np.nan] + list(index[1:])) - result = index.insert(1, nulls_fixture) + + result = index.insert(1, na_val) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index c3ada52eba5aa..859e5f4734596 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1,5 +1,6 @@ import datetime from datetime import timedelta +from decimal import Decimal from io import StringIO import json import os @@ -1731,8 +1732,12 @@ def test_json_pandas_na(self): result = DataFrame([[pd.NA]]).to_json() assert result == '{"0":{"0":null}}' - def test_json_pandas_nulls(self, nulls_fixture): + def test_json_pandas_nulls(self, nulls_fixture, request): # GH 31615 + if isinstance(nulls_fixture, Decimal): + mark = pytest.mark.xfail(reason="not implemented") + request.node.add_marker(mark) + result = DataFrame([[nulls_fixture]]).to_json() assert result == '{"0":{"0":null}}' diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 278a315a479bd..1d68a79487f6d 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -3,6 +3,7 @@ import calendar from collections import deque from datetime import datetime, timedelta +from decimal import Decimal import locale from dateutil.parser import parse @@ -2440,9 +2441,15 @@ def test_nullable_integer_to_datetime(): @pytest.mark.parametrize("klass", [np.array, list]) def test_na_to_datetime(nulls_fixture, klass): - result = pd.to_datetime(klass([nulls_fixture])) - assert result[0] is pd.NaT + if isinstance(nulls_fixture, Decimal): + with pytest.raises(TypeError, match="not convertible to datetime"): + pd.to_datetime(klass([nulls_fixture])) + + else: + result = pd.to_datetime(klass([nulls_fixture])) + + assert result[0] is pd.NaT def test_empty_string_datetime_coerce__format(): diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index df1853ffd26ae..cf45c8517a741 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -332,3 +332,13 @@ def test_allows_duplicate_labels(): with pytest.raises(AssertionError, match=" Date: Wed, 27 Jan 2021 07:48:48 -0800 Subject: [PATCH 3/4] if->elif --- pandas/core/dtypes/missing.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 56a03cb80fa43..28906ce89e5a8 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -606,15 +606,15 @@ def is_valid_nat_for_dtype(obj, dtype: DtypeObj) -> bool: """ if not lib.is_scalar(obj) or not isna(obj): return False - if dtype.kind == "M": + elif dtype.kind == "M": return not isinstance(obj, (np.timedelta64, Decimal)) - if dtype.kind == "m": + elif dtype.kind == "m": return not isinstance(obj, (np.datetime64, Decimal)) - if dtype.kind in ["i", "u", "f", "c"]: + elif dtype.kind in ["i", "u", "f", "c"]: # Numeric return obj is not NaT and not isinstance(obj, (np.datetime64, np.timedelta64)) - if dtype == np.dtype(object): + elif dtype == np.dtype(object): # This is needed for Categorical, but is kind of weird return True From d0e2a87c206b6a3301ca0799bb453830ce235525 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 17 Feb 2021 16:13:45 -0800 Subject: [PATCH 4/4] update for is_matching_na --- pandas/_libs/missing.pyx | 2 ++ pandas/_testing/asserters.py | 19 +++---------------- pandas/conftest.py | 2 +- pandas/tests/dtypes/cast/test_promote.py | 14 +++++++++++--- pandas/tests/dtypes/test_missing.py | 3 +++ pandas/tests/indexes/test_numeric.py | 4 ---- pandas/tests/util/test_assert_attr_equal.py | 3 +++ 7 files changed, 23 insertions(+), 24 deletions(-) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index cbb7f8f96166d..d6a3d18f711d0 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -89,6 +89,8 @@ cpdef bint is_matching_na(object left, object right, bint nan_matches_none=False and util.is_timedelta64_object(right) and get_timedelta64_value(right) == NPY_NAT ) + elif is_decimal_na(left): + return is_decimal_na(right) return False diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 829472f24852a..cfb2f722e3d8b 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -7,6 +7,7 @@ import numpy as np from pandas._libs.lib import no_default +from pandas._libs.missing import is_matching_na import pandas._libs.testing as _testing from pandas.core.dtypes.common import ( @@ -457,22 +458,8 @@ def assert_attr_equal(attr: str, left, right, obj: str = "Attributes"): if left_attr is right_attr: return True - elif ( - is_number(left_attr) - and np.isnan(left_attr) - and is_number(right_attr) - and np.isnan(right_attr) - ): - # np.nan - return True - elif ( - isinstance(left_attr, (np.datetime64, np.timedelta64)) - and isinstance(right_attr, (np.datetime64, np.timedelta64)) - and type(left_attr) is type(right_attr) - and np.isnat(left_attr) - and np.isnat(right_attr) - ): - # np.datetime64("nat") or np.timedelta64("nat") + elif is_matching_na(left_attr, right_attr): + # e.g. both np.nan, both NaT, both pd.NA, ... return True try: diff --git a/pandas/conftest.py b/pandas/conftest.py index ce572e42abec6..426cbf6a65aa5 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -304,7 +304,7 @@ def nselect_method(request): # ---------------------------------------------------------------- # Missing values & co. # ---------------------------------------------------------------- -@pytest.fixture(params=tm.NULL_OBJECTS, ids=str) +@pytest.fixture(params=tm.NULL_OBJECTS, ids=lambda x: type(x).__name__) def nulls_fixture(request): """ Fixture for each null type in pandas. diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 83b835779f503..4c0d417a975c0 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -541,9 +541,17 @@ def test_maybe_promote_any_numpy_dtype_with_na(any_numpy_dtype_reduced, nulls_fi if isinstance(fill_value, Decimal): # Subject to change, but ATM (When Decimal(NAN) is being added to nulls_fixture) - # this is the existing bheavior in maybe_promote - expected_dtype = np.dtype(object) - exp_val_for_scalar = fill_value + # this is the existing behavior in maybe_promote, + # hinges on is_valid_na_for_dtype + if dtype.kind in ["i", "u", "f", "c"]: + if dtype.kind in ["i", "u"]: + expected_dtype = np.dtype(np.float64) + else: + expected_dtype = dtype + exp_val_for_scalar = np.nan + else: + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value elif is_integer_dtype(dtype) and fill_value is not NaT: # integer + other missing value (np.nan / None) casts to float expected_dtype = np.float64 diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index b4535ab5f2396..ecd56b5b61244 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -714,6 +714,9 @@ def test_is_matching_na(self, nulls_fixture, nulls_fixture2): elif is_float(left) and is_float(right): # np.nan vs float("NaN") we consider as matching assert libmissing.is_matching_na(left, right) + elif type(left) is type(right): + # e.g. both Decimal("NaN") + assert libmissing.is_matching_na(left, right) else: assert not libmissing.is_matching_na(left, right) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 0b4d530f0257f..99dadfba4e7aa 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -1,5 +1,4 @@ from datetime import datetime -from decimal import Decimal import numpy as np import pytest @@ -109,9 +108,6 @@ def test_insert_na(self, nulls_fixture): if na_val is pd.NaT: expected = Index([index[0], pd.NaT] + list(index[1:]), dtype=object) - elif isinstance(na_val, Decimal) and not isinstance(index, Float64Index): - # TODO: decide if this is the desired behavior - expected = Index([index[0], na_val] + list(index[1:]), dtype=object) else: expected = Float64Index([index[0], np.nan] + list(index[1:])) diff --git a/pandas/tests/util/test_assert_attr_equal.py b/pandas/tests/util/test_assert_attr_equal.py index 6fad38c2cd44e..115ef58e085cc 100644 --- a/pandas/tests/util/test_assert_attr_equal.py +++ b/pandas/tests/util/test_assert_attr_equal.py @@ -25,6 +25,9 @@ def test_assert_attr_equal_different_nulls(nulls_fixture, nulls_fixture2): elif is_float(nulls_fixture) and is_float(nulls_fixture2): # we consider float("nan") and np.float64("nan") to be equivalent assert tm.assert_attr_equal("na_value", obj, obj2) + elif type(nulls_fixture) is type(nulls_fixture2): + # e.g. Decimal("NaN") + assert tm.assert_attr_equal("na_value", obj, obj2) else: with pytest.raises(AssertionError, match='"na_value" are different'): tm.assert_attr_equal("na_value", obj, obj2)