From 0a1872e18b7001b38d9c6fc32a663d7c8e8ae685 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 15 Jan 2023 22:10:21 +0100 Subject: [PATCH 1/3] BUG: Series add casting to object for list and masked series --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/arrays/masked.py | 12 ++++++++++++ pandas/tests/series/test_arithmetic.py | 11 +++++++++++ 3 files changed, 24 insertions(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 033f47f0c994d..3dcee72770272 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -929,6 +929,7 @@ Numeric - Bug in arithmetic operations on :class:`Series` not propagating mask when combining masked dtypes and numpy dtypes (:issue:`45810`, :issue:`42630`) - Bug in DataFrame reduction methods (e.g. :meth:`DataFrame.sum`) with object dtype, ``axis=1`` and ``numeric_only=False`` would not be coerced to float (:issue:`49551`) - Bug in :meth:`DataFrame.sem` and :meth:`Series.sem` where an erroneous ``TypeError`` would always raise when using data backed by an :class:`ArrowDtype` (:issue:`49759`) +- Bug in :meth:`Series.__add__` casting to object for list and masked :class:`Series` (:issue:`22962`) Conversion ^^^^^^^^^^ diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index aaa7c706d95bb..c52ae42848c06 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -621,6 +621,18 @@ def _arith_method(self, other, op): op_name = op.__name__ omask = None + if not hasattr(other, "dtype") and is_list_like(other): + # Try inferring masked dtype instead of casting to object + inferred_dtype = lib.infer_dtype(other, skipna=True) + if inferred_dtype == "integer": + from pandas.core.arrays import IntegerArray + + other = IntegerArray._from_sequence(other) + elif inferred_dtype in ["floating", "mixed-integer-float"]: + from pandas.core.arrays import FloatingArray + + other = FloatingArray._from_sequence(other) + if isinstance(other, BaseMaskedArray): other, omask = other._data, other._mask diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 35096540c70d3..7e8cc24d21661 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -325,6 +325,17 @@ def test_mask_div_propagate_na_for_non_na_dtype(self): result = ser2 / ser1 tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("val, dtype", [(3, "Int64"), (3.5, "Float64")]) + def test_add_list_to_masked_array(self, val, dtype): + # GH#22962 + ser = Series([1, None, 3], dtype="Int64") + result = ser + [1, None, val] + expected = Series([2, None, 3 + val], dtype=dtype) + tm.assert_series_equal(result, expected) + + result = [1, None, val] + ser + tm.assert_series_equal(result, expected) + # ------------------------------------------------------------------ # Comparisons From 1c7399c3bec58425fcff8c227afb960e23aff226 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 16 Jan 2023 20:00:27 +0100 Subject: [PATCH 2/3] Fix boolean --- pandas/core/arrays/masked.py | 5 +++++ pandas/tests/series/test_arithmetic.py | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index c52ae42848c06..02ec1687e72ee 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -633,6 +633,11 @@ def _arith_method(self, other, op): other = FloatingArray._from_sequence(other) + elif inferred_dtype in ["boolean"]: + from pandas.core.arrays import BooleanArray + + other = BooleanArray._from_sequence(other) + if isinstance(other, BaseMaskedArray): other, omask = other._data, other._mask diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 7e8cc24d21661..a7f73f2e22fca 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -336,6 +336,16 @@ def test_add_list_to_masked_array(self, val, dtype): result = [1, None, val] + ser tm.assert_series_equal(result, expected) + def test_add_list_to_masked_array_boolean(self): + # GH#22962 + ser = Series([True, None, False], dtype="boolean") + result = ser + [True, None, True] + expected = Series([True, None, True], dtype="boolean") + tm.assert_series_equal(result, expected) + + result = [True, None, True] + ser + tm.assert_series_equal(result, expected) + # ------------------------------------------------------------------ # Comparisons From 7a28bd66216647083f573d90122cc073e8175ea0 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 16 Jan 2023 20:49:01 +0100 Subject: [PATCH 3/3] Fix length missmatch --- pandas/core/arrays/masked.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 02ec1687e72ee..77735add89bf7 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -621,7 +621,11 @@ def _arith_method(self, other, op): op_name = op.__name__ omask = None - if not hasattr(other, "dtype") and is_list_like(other): + if ( + not hasattr(other, "dtype") + and is_list_like(other) + and len(other) == len(self) + ): # Try inferring masked dtype instead of casting to object inferred_dtype = lib.infer_dtype(other, skipna=True) if inferred_dtype == "integer":