From affc8b036b65e7097682933a0c04ac9c92097a48 Mon Sep 17 00:00:00 2001 From: SleepingPills Date: Wed, 10 Jul 2013 19:05:03 +0200 Subject: [PATCH 1/2] BUG: (GH4192) Fixed buglet in the broadcasting logic in Series.where --- doc/source/release.rst | 3 +++ pandas/core/series.py | 2 +- pandas/tests/test_series.py | 23 ++++++++++++++++++----- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index a9e88f1341992..ada899e099aaa 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -324,6 +324,9 @@ pandas 0.12 (:issue:`4145`, :issue:`4146`) - Fixed bug in the parsing of microseconds when using the ``format`` argument in ``to_datetime`` (:issue:`4152`) + - Fixed bug in ``Series.where`` where broadcasting a single element input vector + to the length of the series resulted in multiplying the value + inside the input (:issue:`4192`) pandas 0.11.0 ============= diff --git a/pandas/core/series.py b/pandas/core/series.py index 06abd1d5b4127..ee991ef645ea8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -755,7 +755,7 @@ def where(self, cond, other=nan, inplace=False): # GH 2745 # treat like a scalar if len(other) == 1: - other = np.array(other[0]*len(ser)) + other = np.repeat(other, len(ser)) # GH 3235 # match True cond to other diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index b4ad172ddf340..a1858a350447d 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -1090,11 +1090,6 @@ def test_where(self): expected = Series([0,2]) assert_series_equal(s,expected) - s = Series([1,2]) - s[[True, False]] = [0] - expected = Series([0,2]) - assert_series_equal(s,expected) - # failures self.assertRaises(ValueError, s.__setitem__, tuple([[[True, False]]]), [0,2,3]) self.assertRaises(ValueError, s.__setitem__, tuple([[[True, False]]]), []) @@ -1142,6 +1137,24 @@ def test_where(self): s = Series(np.arange(10)) mask = s > 5 self.assertRaises(ValueError, s.__setitem__, mask, ([0]*5,)) + + def test_where_broadcast(self): + # Test a variety of differently sized series + for size in range(2, 6): + # Test a variety of boolean indices + for selection in [np.resize([True, False, False, False, False], size), # First element should be set + np.resize([True, False], size), # Set alternating elements] + np.resize([False], size)]: # No element should be set + # Test a variety of different numbers as content + for item in [2.0, np.nan, np.finfo(np.float).max, np.finfo(np.float).min]: + # Test numpy arrays, lists and tuples as the input to be broadcast + for arr in [np.array([item]), [item], (item,)]: + data = np.arange(size, dtype=float) + s = Series(data) + s[selection] = arr + # Construct the expected series by taking the source data or item based on the selection + expected = Series([item if use_item else data[i] for i, use_item in enumerate(selection)]) + assert_series_equal(s,expected) def test_where_inplace(self): s = Series(np.random.randn(5)) From fdbba96872468a9522387fe8f81f65dcf83492e9 Mon Sep 17 00:00:00 2001 From: SleepingPills Date: Wed, 10 Jul 2013 19:53:43 +0200 Subject: [PATCH 2/2] BUG: (GH4192) fixed broken unit test --- pandas/core/series.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index ee991ef645ea8..81a68138ce202 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -751,11 +751,12 @@ def where(self, cond, other=nan, inplace=False): if len(other) != len(ser): icond = ~cond - + print "First", other # GH 2745 # treat like a scalar if len(other) == 1: - other = np.repeat(other, len(ser)) + other = np.array(other[0]) + print "EQ 1", other # GH 3235 # match True cond to other @@ -767,7 +768,10 @@ def where(self, cond, other=nan, inplace=False): other = new_other else: + print "Raising", other raise ValueError('Length of replacements must equal series length') + + print "Finished", other change = ser if inplace else None com._maybe_upcast_putmask(ser,~cond,other,change=change)