Skip to content

Commit 65815e6

Browse files
Unprocessablejreback
authored andcommitted
BUG: Fix numpy boolean subtraction error in Series.diff (#28251)
1 parent 7721f31 commit 65815e6

File tree

4 files changed

+68
-43
lines changed

4 files changed

+68
-43
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,7 @@ Other
307307
- Trying to set the ``display.precision``, ``display.max_rows`` or ``display.max_columns`` using :meth:`set_option` to anything but a ``None`` or a positive int will raise a ``ValueError`` (:issue:`23348`)
308308
- Using :meth:`DataFrame.replace` with overlapping keys in a nested dictionary will no longer raise, now matching the behavior of a flat dictionary (:issue:`27660`)
309309
- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support dicts as ``compression`` argument with key ``'method'`` being the compression method and others as additional compression options when the compression method is ``'zip'``. (:issue:`26023`)
310+
- Bug in :meth:`Series.diff` where a boolean series would incorrectly raise a ``TypeError`` (:issue:`17294`)
310311
- :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`)
311312

312313
.. _whatsnew_1000.contributors:

pandas/core/algorithms.py

+4
Original file line numberDiff line numberDiff line change
@@ -1910,6 +1910,7 @@ def diff(arr, n: int, axis: int = 0):
19101910
dtype = arr.dtype
19111911

19121912
is_timedelta = False
1913+
is_bool = False
19131914
if needs_i8_conversion(arr):
19141915
dtype = np.float64
19151916
arr = arr.view("i8")
@@ -1918,6 +1919,7 @@ def diff(arr, n: int, axis: int = 0):
19181919

19191920
elif is_bool_dtype(dtype):
19201921
dtype = np.object_
1922+
is_bool = True
19211923

19221924
elif is_integer_dtype(dtype):
19231925
dtype = np.float64
@@ -1959,6 +1961,8 @@ def diff(arr, n: int, axis: int = 0):
19591961
result = res - lag
19601962
result[mask] = na
19611963
out_arr[res_indexer] = result
1964+
elif is_bool:
1965+
out_arr[res_indexer] = arr[res_indexer] ^ arr[lag_indexer]
19621966
else:
19631967
out_arr[res_indexer] = arr[res_indexer] - arr[lag_indexer]
19641968

pandas/tests/series/test_analytics.py

+63-1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from pandas.api.types import is_scalar
2121
from pandas.core.index import MultiIndex
2222
from pandas.core.indexes.datetimes import Timestamp
23+
from pandas.core.indexes.timedeltas import TimedeltaIndex
2324
import pandas.util.testing as tm
2425
from pandas.util.testing import (
2526
assert_almost_equal,
@@ -228,7 +229,7 @@ def test_cummax_timedelta64(self):
228229
result = s.cummax(skipna=False)
229230
tm.assert_series_equal(expected, result)
230231

231-
def test_npdiff(self):
232+
def test_np_diff(self):
232233
pytest.skip("skipping due to Series no longer being an ndarray")
233234

234235
# no longer works as the return type of np.diff is now nd.array
@@ -237,6 +238,67 @@ def test_npdiff(self):
237238
r = np.diff(s)
238239
assert_series_equal(Series([nan, 0, 0, 0, nan]), r)
239240

241+
def test_int_diff(self):
242+
# int dtype
243+
a = 10000000000000000
244+
b = a + 1
245+
s = Series([a, b])
246+
247+
result = s.diff()
248+
assert result[1] == 1
249+
250+
def test_tz_diff(self):
251+
# Combined datetime diff, normal diff and boolean diff test
252+
ts = tm.makeTimeSeries(name="ts")
253+
ts.diff()
254+
255+
# neg n
256+
result = ts.diff(-1)
257+
expected = ts - ts.shift(-1)
258+
assert_series_equal(result, expected)
259+
260+
# 0
261+
result = ts.diff(0)
262+
expected = ts - ts
263+
assert_series_equal(result, expected)
264+
265+
# datetime diff (GH3100)
266+
s = Series(date_range("20130102", periods=5))
267+
result = s.diff()
268+
expected = s - s.shift(1)
269+
assert_series_equal(result, expected)
270+
271+
# timedelta diff
272+
result = result - result.shift(1) # previous result
273+
expected = expected.diff() # previously expected
274+
assert_series_equal(result, expected)
275+
276+
# with tz
277+
s = Series(
278+
date_range("2000-01-01 09:00:00", periods=5, tz="US/Eastern"), name="foo"
279+
)
280+
result = s.diff()
281+
expected = Series(TimedeltaIndex(["NaT"] + ["1 days"] * 4), name="foo")
282+
assert_series_equal(result, expected)
283+
284+
@pytest.mark.parametrize(
285+
"input,output,diff",
286+
[([False, True, True, False, False], [nan, True, False, True, False], 1)],
287+
)
288+
def test_bool_diff(self, input, output, diff):
289+
# boolean series (test for fixing #17294)
290+
s = Series(input)
291+
result = s.diff()
292+
expected = Series(output)
293+
assert_series_equal(result, expected)
294+
295+
def test_obj_diff(self):
296+
# object series
297+
s = Series([False, True, 5.0, nan, True, False])
298+
result = s.diff()
299+
expected = s - s.shift(1)
300+
assert_series_equal(result, expected)
301+
240302
def _check_accum_op(self, name, datetime_series_, check_dtype=True):
241303
func = getattr(np, name)
242304
tm.assert_numpy_array_equal(

pandas/tests/series/test_timeseries.py

-42
Original file line numberDiff line numberDiff line change
@@ -355,48 +355,6 @@ def test_asfreq_datetimeindex_empty_series(self):
355355
)
356356
tm.assert_index_equal(expected.index, result.index)
357357

358-
def test_diff(self):
359-
# Just run the function
360-
self.ts.diff()
361-
362-
# int dtype
363-
a = 10000000000000000
364-
b = a + 1
365-
s = Series([a, b])
366-
367-
rs = s.diff()
368-
assert rs[1] == 1
369-
370-
# neg n
371-
rs = self.ts.diff(-1)
372-
xp = self.ts - self.ts.shift(-1)
373-
assert_series_equal(rs, xp)
374-
375-
# 0
376-
rs = self.ts.diff(0)
377-
xp = self.ts - self.ts
378-
assert_series_equal(rs, xp)
379-
380-
# datetime diff (GH3100)
381-
s = Series(date_range("20130102", periods=5))
382-
rs = s - s.shift(1)
383-
xp = s.diff()
384-
assert_series_equal(rs, xp)
385-
386-
# timedelta diff
387-
nrs = rs - rs.shift(1)
388-
nxp = xp.diff()
389-
assert_series_equal(nrs, nxp)
390-
391-
# with tz
392-
s = Series(
393-
date_range("2000-01-01 09:00:00", periods=5, tz="US/Eastern"), name="foo"
394-
)
395-
result = s.diff()
396-
assert_series_equal(
397-
result, Series(TimedeltaIndex(["NaT"] + ["1 days"] * 4), name="foo")
398-
)
399-
400358
def test_pct_change(self):
401359
rs = self.ts.pct_change(fill_method=None)
402360
assert_series_equal(rs, self.ts / self.ts.shift(1) - 1)

0 commit comments

Comments
 (0)