diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index fb19fd81fe7c7..8045f152f8dd5 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -651,6 +651,7 @@ Other API Changes - :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`) - Set operations (union, difference...) on :class:`IntervalIndex` with incompatible index types will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`19329`) - :class:`DateOffset` objects render more simply, e.g. ```` instead of ```` (:issue:`19403`) +- Boolean operations ``&, |, ^`` between a ``Series`` and an ``Index`` will no longer raise ``TypeError`` (:issue:`19792`) - ``Categorical.fillna`` now validates its ``value`` and ``method`` keyword arguments. It now raises when both or none are specified, matching the behavior of :meth:`Series.fillna` (:issue:`19682`) - :func:`Series.str.replace` now takes an optional `regex` keyword which, when set to ``False``, uses literal string replacement rather than regex replacement (:issue:`16808`) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 83f17a332f4be..c7039e5e1d9a9 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1241,23 +1241,22 @@ def _bool_method_SERIES(cls, op, special): Wrapper function for Series arithmetic operations, to avoid code duplication. """ + name = _get_op_name(op, special) def na_op(x, y): try: result = op(x, y) except TypeError: - if isinstance(y, list): - y = construct_1d_object_array_from_listlike(y) - - if isinstance(y, (np.ndarray, ABCSeries)): - if (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)): - result = op(x, y) # when would this be hit? - else: - x = _ensure_object(x) - y = _ensure_object(y) - result = libops.vec_binop(x, y, op) + assert not isinstance(y, (list, ABCSeries, pd.Index)) + if isinstance(y, np.ndarray): + # bool-bool dtype operations should be OK, should not get here + assert not (is_bool_dtype(x) and is_bool_dtype(y)) + x = _ensure_object(x) + y = _ensure_object(y) + result = libops.vec_binop(x, y, op) else: # let null fall thru + assert is_scalar(y) if not isna(y): y = bool(y) try: @@ -1278,32 +1277,39 @@ def wrapper(self, other): self, other = _align_method_SERIES(self, other, align_asobject=True) + res_name = get_op_result_name(self, other) + if isinstance(other, ABCDataFrame): # Defer to DataFrame implementation; fail early return NotImplemented - elif isinstance(other, ABCSeries): - name = get_op_result_name(self, other) + elif isinstance(other, (ABCSeries, pd.Index)): is_other_int_dtype = is_integer_dtype(other.dtype) other = fill_int(other) if is_other_int_dtype else fill_bool(other) - - filler = (fill_int if is_self_int_dtype and is_other_int_dtype - else fill_bool) - - res_values = na_op(self.values, other.values) - unfilled = self._constructor(res_values, - index=self.index, name=name) - return filler(unfilled) + ovalues = other.values else: # scalars, list, tuple, np.array - filler = (fill_int if is_self_int_dtype and - is_integer_dtype(np.asarray(other)) else fill_bool) - - res_values = na_op(self.values, other) - unfilled = self._constructor(res_values, index=self.index) - return filler(unfilled).__finalize__(self) + is_other_int_dtype = is_integer_dtype(np.asarray(other)) + if is_list_like(other) and not isinstance(other, np.ndarray): + # TODO: Can we do this before the is_integer_dtype check? + # could the is_integer_dtype check be checking the wrong + # thing? e.g. other = [[0, 1], [2, 3], [4, 5]]? + other = construct_1d_object_array_from_listlike(other) + ovalues = other + + filler = (fill_int if is_self_int_dtype and is_other_int_dtype + else fill_bool) + + res_values = na_op(self.values, ovalues) + unfilled = self._constructor(res_values, index=self.index, + name=res_name) + result = filler(unfilled) + if not isinstance(other, ABCSeries): + result = result.__finalize__(self) + return result + wrapper.__name__ = name return wrapper diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index f90fcce973f00..834ee75443c52 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -178,46 +178,47 @@ def test_comparison_tuples(self): expected = Series([True, False]) assert_series_equal(result, expected) - def test_comparison_operators_with_nas(self): + @pytest.mark.parametrize('op', ['lt', 'le', 'gt', 'ge', 'eq', 'ne']) + def test_comparison_operators_with_nas(self, op): ser = Series(bdate_range('1/1/2000', periods=10), dtype=object) ser[::2] = np.nan # test that comparisons work - ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne'] - for op in ops: - val = ser[5] + val = ser[5] - f = getattr(operator, op) - result = f(ser, val) + f = getattr(operator, op) + result = f(ser, val) - expected = f(ser.dropna(), val).reindex(ser.index) + expected = f(ser.dropna(), val).reindex(ser.index) - if op == 'ne': - expected = expected.fillna(True).astype(bool) - else: - expected = expected.fillna(False).astype(bool) + if op == 'ne': + expected = expected.fillna(True).astype(bool) + else: + expected = expected.fillna(False).astype(bool) - assert_series_equal(result, expected) + assert_series_equal(result, expected) - # fffffffuuuuuuuuuuuu - # result = f(val, s) - # expected = f(val, s.dropna()).reindex(s.index) - # assert_series_equal(result, expected) + # fffffffuuuuuuuuuuuu + # result = f(val, s) + # expected = f(val, s.dropna()).reindex(s.index) + # assert_series_equal(result, expected) - # boolean &, |, ^ should work with object arrays and propagate NAs + @pytest.mark.parametrize('bool_op', ['and_', 'or_', 'xor']) + def test_bool_operators_with_nas(self, bool_op): + # boolean &, |, ^ should work with object arrays and propagate NAs - ops = ['and_', 'or_', 'xor'] - mask = ser.isna() - for bool_op in ops: - func = getattr(operator, bool_op) + ser = Series(bdate_range('1/1/2000', periods=10), dtype=object) + ser[::2] = np.nan - filled = ser.fillna(ser[0]) + func = getattr(operator, bool_op) + result = func(ser < ser[9], ser > ser[3]) - result = func(ser < ser[9], ser > ser[3]) + mask = ser.isna() + filled = ser.fillna(ser[0]) + expected = func(filled < filled[9], filled > filled[3]) + expected[mask] = False - expected = func(filled < filled[9], filled > filled[3]) - expected[mask] = False - assert_series_equal(result, expected) + assert_series_equal(result, expected) def test_comparison_object_numeric_nas(self): ser = Series(np.random.randn(10), dtype=object) @@ -371,10 +372,10 @@ def test_comparison_different_length(self): b = Series([2, 3, 4]) pytest.raises(ValueError, a.__eq__, b) - def test_comparison_label_based(self): + def test_bool_opslabel_based(self): # GH 4947 - # comparisons should be label based + # boolean ops should be label based a = Series([True, False, True], list('bca')) b = Series([False, True, False], list('abc')) @@ -1398,45 +1399,73 @@ def test_ops_datetimelike_align(self): def test_operators_bitwise(self): # GH 9016: support bitwise op for integer types - index = list('bca') + s_0123 = Series(range(4), dtype='int64') + s_1111 = Series([1] * 4, dtype='int8') - s_tft = Series([True, False, True], index=index) - s_fff = Series([False, False, False], index=index) - s_tff = Series([True, False, False], index=index) - s_empty = Series([]) + # We cannot wrap s111.astype(np.int32) with pd.Index because it + # will case to int64 + res = s_0123.astype(np.int16) | s_1111.astype(np.int32) + expected = Series([1, 1, 3, 3], dtype='int32') + assert_series_equal(res, expected) - # TODO: unused - # s_0101 = Series([0, 1, 0, 1]) + def test_operators_bitwise_invalid(self): + # GH#9016: support bitwise op for integer types + s_0123 = Series(range(4), dtype='int64') + s_1111 = Series([1] * 4, dtype='int8') + + with pytest.raises(TypeError): + s_1111 & 'a' + with pytest.raises(TypeError): + s_1111 & ['a', 'b', 'c', 'd'] + with pytest.raises(TypeError): + s_0123 & 3.14 + with pytest.raises(TypeError): + s_0123 & [0.1, 4, 3.14, 2] + def test_operators_bitwise_int_series_with_float_series(self): + # GH#9016: support bitwise op for integer types + s_0123 = Series(range(4), dtype='int64') + s_ftft = Series([False, True, False, True]) + result = s_0123 & Series([0.1, 4, -3.14, 2]) + assert_series_equal(result, s_ftft) + + @pytest.mark.parametrize('box', [np.array, pd.Index, pd.Series]) + def test_operators_bitwise_with_int_arraylike(self, box): + # GH#9016: support bitwise op for integer types + # GH#19795 allow for operating with Index s_0123 = Series(range(4), dtype='int64') s_3333 = Series([3] * 4) s_4444 = Series([4] * 4) - res = s_tft & s_empty - expected = s_fff + res = s_0123 & box(s_3333) + expected = Series(range(4), dtype='int64') assert_series_equal(res, expected) - res = s_tft | s_empty - expected = s_tft + res = s_0123 | box(s_4444) + expected = Series(range(4, 8), dtype='int64') assert_series_equal(res, expected) - res = s_0123 & s_3333 - expected = Series(range(4), dtype='int64') + s_1111 = Series([1] * 4, dtype='int8') + res = s_0123 & box(s_1111) + expected = Series([0, 1, 0, 1], dtype='int64') assert_series_equal(res, expected) - res = s_0123 | s_4444 - expected = Series(range(4, 8), dtype='int64') - assert_series_equal(res, expected) + def test_operators_bitwise_with_bool(self): + # GH#9016: support bitwise op for integer types + s_0123 = Series(range(4), dtype='int64') - s_a0b1c0 = Series([1], list('b')) + assert_series_equal(s_0123 & False, Series([False] * 4)) + assert_series_equal(s_0123 ^ False, Series([False, True, True, True])) + assert_series_equal(s_0123 & [False], Series([False] * 4)) + assert_series_equal(s_0123 & (False), Series([False] * 4)) - res = s_tft & s_a0b1c0 - expected = s_tff.reindex(list('abc')) - assert_series_equal(res, expected) + def test_operators_bitwise_with_integers(self): + # GH#9016: support bitwise op for integer types + index = list('bca') - res = s_tft | s_a0b1c0 - expected = s_tft.reindex(list('abc')) - assert_series_equal(res, expected) + s_tft = Series([True, False, True], index=index) + s_fff = Series([False, False, False], index=index) + s_0123 = Series(range(4), dtype='int64') n0 = 0 res = s_tft & n0 @@ -1456,20 +1485,34 @@ def test_operators_bitwise(self): expected = Series([0, 1, 0, 1]) assert_series_equal(res, expected) - s_1111 = Series([1] * 4, dtype='int8') - res = s_0123 & s_1111 - expected = Series([0, 1, 0, 1], dtype='int64') + def test_operators_bitwise_with_reindexing(self): + # ops where reindeing needs to be done, so operating with a Series + # makes sense but with an Index or array does not + # GH#9016: support bitwise op for integer types + index = list('bca') + + s_tft = Series([True, False, True], index=index) + s_fff = Series([False, False, False], index=index) + s_tff = Series([True, False, False], index=index) + s_empty = Series([]) + s_a0b1c0 = Series([1], list('b')) + s_0123 = Series(range(4), dtype='int64') + + res = s_tft & s_empty + expected = s_fff assert_series_equal(res, expected) - res = s_0123.astype(np.int16) | s_1111.astype(np.int32) - expected = Series([1, 1, 3, 3], dtype='int32') + res = s_tft | s_empty + expected = s_tft assert_series_equal(res, expected) - pytest.raises(TypeError, lambda: s_1111 & 'a') - pytest.raises(TypeError, lambda: s_1111 & ['a', 'b', 'c', 'd']) - pytest.raises(TypeError, lambda: s_0123 & np.NaN) - pytest.raises(TypeError, lambda: s_0123 & 3.14) - pytest.raises(TypeError, lambda: s_0123 & [0.1, 4, 3.14, 2]) + res = s_tft & s_a0b1c0 + expected = s_tff.reindex(list('abc')) + assert_series_equal(res, expected) + + res = s_tft | s_a0b1c0 + expected = s_tft.reindex(list('abc')) + assert_series_equal(res, expected) # s_0123 will be all false now because of reindexing like s_tft if compat.PY3: @@ -1491,16 +1534,15 @@ def test_operators_bitwise(self): exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c']) assert_series_equal(s_0123 & s_tft, exp) - assert_series_equal(s_0123 & False, Series([False] * 4)) - assert_series_equal(s_0123 ^ False, Series([False, True, True, True])) - assert_series_equal(s_0123 & [False], Series([False] * 4)) - assert_series_equal(s_0123 & (False), Series([False] * 4)) - assert_series_equal(s_0123 & Series([False, np.NaN, False, False]), - Series([False] * 4)) + def test_operators_bitwise_with_nans(self): + # with NaNs present op(series, series) works but op(series, index) + # is not implemented + # GH#9016: support bitwise op for integer types + s_0123 = Series(range(4), dtype='int64') + result = s_0123 & Series([False, np.NaN, False, False]) + assert_series_equal(result, Series([False] * 4)) s_ftft = Series([False, True, False, True]) - assert_series_equal(s_0123 & Series([0.1, 4, -3.14, 2]), s_ftft) - s_abNd = Series(['a', 'b', np.NaN, 'd']) res = s_0123 & s_abNd expected = s_ftft @@ -1509,17 +1551,16 @@ def test_operators_bitwise(self): def test_scalar_na_cmp_corners(self): s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10]) - def tester(a, b): - return a & b - - pytest.raises(TypeError, tester, s, datetime(2005, 1, 1)) + with pytest.raises(TypeError): + s & datetime(2005, 1, 1) s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)]) s[::2] = np.nan expected = Series(True, index=s.index) expected[::2] = False - assert_series_equal(tester(s, list(s)), expected) + result = s & list(s) + assert_series_equal(result, expected) d = DataFrame({'A': s}) # TODO: Fix this exception - needs to be fixed! (see GH5035) @@ -1530,7 +1571,8 @@ def tester(a, b): # https://github.com/pandas-dev/pandas/issues/5284 pytest.raises(ValueError, lambda: d.__and__(s, axis='columns')) - pytest.raises(ValueError, tester, s, d) + with pytest.raises(ValueError): + s & d # this is wrong as its not a boolean result # result = d.__and__(s,axis='index')