Skip to content

GH 9016: Bitwise operation weirdness #9338

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 5, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions doc/source/whatsnew/v0.16.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,35 @@ Backwards incompatible API changes
The prior style can be achieved with matplotlib's `axhline` or `axvline`
methods (:issue:`9088`).


- ``Series`` now supports bitwise operation for integral types (:issue:`9016`)

Previously even if the input dtypes where integral, the output dtype was coerced to bool.

.. code-block:: python
In [2]: pd.Series([0,1,2,3], list('abcd')) | pd.Series([4,4,4,4], list('abcd'))
Out[2]:
a True
b True
c True
d True
dtype: bool

Now if the input dtypes are integral, the output dtype is also integral and the output
values are the result of the bitwise operation.

.. code-block:: python

In [2]: pd.Series([0,1,2,3], list('abcd')) | pd.Series([4,4,4,4], list('abcd'))
Out[2]:
a 4
b 5
c 6
d 7
dtype: int64



Deprecations
~~~~~~~~~~~~

Expand Down
25 changes: 18 additions & 7 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,20 +654,31 @@ def na_op(x, y):
return result

def wrapper(self, other):
is_self_int_dtype = com.is_integer_dtype(self.dtype)

fill_int = lambda x: x.fillna(0)
fill_bool = lambda x: x.fillna(False).astype(bool)

if isinstance(other, pd.Series):
name = _maybe_match_name(self, other)
other = other.reindex_like(self)
is_other_int_dtype = com.is_integer_dtype(other.dtype)
other = fill_int(other) if is_other_int_dtype else fill_bool(other)

other = other.reindex_like(self).fillna(False).astype(bool)
return self._constructor(na_op(self.values, other.values),
filler = fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool
return filler(self._constructor(na_op(self.values, other.values),
index=self.index,
name=name).fillna(False).astype(bool)
name=name))

elif isinstance(other, pd.DataFrame):
return NotImplemented

else:
# scalars
res = self._constructor(na_op(self.values, other),
index=self.index).fillna(False)
return res.astype(bool).__finalize__(self)
# scalars, list, tuple, np.array
filler = fill_int if is_self_int_dtype and com.is_integer_dtype(np.asarray(other)) else fill_bool
return filler(self._constructor(na_op(self.values, other),
index=self.index)).__finalize__(self)

return wrapper


Expand Down
90 changes: 90 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3790,6 +3790,96 @@ def test_comparison_label_based(self):
for v in [np.nan]:
self.assertRaises(TypeError, lambda : t & v)

def test_operators_bitwise(self):
# GH 9016: support bitwise op for integer types
index = list('bca')

s_tft = Series([True, False, True], index=index)
s_fff = Series([False, False, False], index=index)
s_tff = Series([True, False, False], index=index)
s_empty = Series([])
s_0101 = Series([0,1,0,1])
s_0123 = Series(range(4))
s_3333 = Series([3] * 4)
s_4444 = Series([4] * 4)

res = s_tft & s_empty
expected = s_fff
assert_series_equal(res, expected)

res = s_tft | s_empty
expected = s_tft
assert_series_equal(res, expected)

res = s_0123 & s_3333
expected = Series(range(4))
assert_series_equal(res, expected)

res = s_0123 | s_4444
expected = Series(range(4, 8))
assert_series_equal(res, expected)

s_a0b1c0 = Series([1], list('b'))

res = s_tft & s_a0b1c0
expected = s_tff
assert_series_equal(res, expected)

res = s_tft | s_a0b1c0
expected = s_tft
assert_series_equal(res, expected)

n0 = 0
res = s_tft & n0
expected = s_fff
assert_series_equal(res, expected)

res = s_0123 & n0
expected = Series([0] * 4)
assert_series_equal(res, expected)

n1 = 1
res = s_tft & n1
expected = s_tft
assert_series_equal(res, expected)

res = s_0123 & n1
expected = Series([0, 1, 0, 1])
assert_series_equal(res, expected)

s_1111 = Series([1]*4, dtype='int8')
res = s_0123 & s_1111
expected = Series([0, 1, 0, 1], dtype='int64')
assert_series_equal(res, expected)

res = s_0123.astype(np.int16) | s_1111.astype(np.int32)
expected = Series([1, 1, 3, 3], dtype='int32')
assert_series_equal(res, expected)

self.assertRaises(TypeError, lambda: s_1111 & 'a')
self.assertRaises(TypeError, lambda: s_1111 & ['a','b','c','d'])
self.assertRaises(TypeError, lambda: s_0123 & np.NaN)
self.assertRaises(TypeError, lambda: s_0123 & 3.14)
self.assertRaises(TypeError, lambda: s_0123 & [0.1, 4, 3.14, 2])

# s_0123 will be all false now because of reindexing like s_tft
assert_series_equal(s_tft & s_0123, Series([False] * 3, list('bca')))
# s_tft will be all false now because of reindexing like s_0123
assert_series_equal(s_0123 & s_tft, Series([False] * 4))
assert_series_equal(s_0123 & False, Series([False] * 4))
assert_series_equal(s_0123 ^ False, Series([False, True, True, True]))
assert_series_equal(s_0123 & [False], Series([False] * 4))
assert_series_equal(s_0123 & (False), Series([False] * 4))
assert_series_equal(s_0123 & Series([False, np.NaN, False, False]), Series([False] * 4))

s_ftft = Series([False, True, False, True])
assert_series_equal(s_0123 & Series([0.1, 4, -3.14, 2]), s_ftft)

s_abNd = Series(['a','b',np.NaN,'d'])
res = s_0123 & s_abNd
expected = s_ftft
assert_series_equal(res, expected)

def test_between(self):
s = Series(bdate_range('1/1/2000', periods=20).asobject)
s[::2] = np.nan
Expand Down