From ea49e77fea7caa5f5018e6778eccc76e6eb9c576 Mon Sep 17 00:00:00 2001 From: Giacomo Ferroni Date: Sat, 21 Jan 2017 17:31:13 +0000 Subject: [PATCH 1/5] Fix for GH12541 --- pandas/core/window.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index b7276aed506de..5b5b9e56771f9 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -762,17 +762,7 @@ def count(self): results = [] for b in blocks: - - if needs_i8_conversion(b.values): - result = b.notnull().astype(int) - else: - try: - result = np.isfinite(b).astype(float) - except TypeError: - result = np.isfinite(b.astype(float)).astype(float) - - result[pd.isnull(result)] = 0 - + result = b.notnull().astype(int) result = self._constructor(result, window=window, min_periods=0, center=self.center).sum() results.append(result) From 26c86a53ca0326fcc4d87f61d7a7ea3aee6ac7cc Mon Sep 17 00:00:00 2001 From: Giacomo Ferroni Date: Sat, 21 Jan 2017 17:33:07 +0000 Subject: [PATCH 2/5] Test added for GH12541 --- pandas/tests/test_window.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 929ff43bfaaad..0feecd2c59c57 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -207,6 +207,20 @@ def f(): 'A', 'ra', 'std'), ('B', 'rb', 'mean'), ('B', 'rb', 'std')]) tm.assert_frame_equal(result, expected, check_like=True) + def test_count_nonnumeric_types(self): + # GH12541 + df_inf = DataFrame({'x': [1, 2, 3], 'y': [1., 2., np.Inf]}) + df_date = DataFrame({'x': [1, 2, 3], 'y': pd.date_range('20130101',periods=3)}) + df_inf_date = DataFrame({'x': [1, 2, 3], 'y': [1., 2., np.Inf], + 'z': pd.date_range('20170101',periods=3)}) + + expected_1 = DataFrame([[1,1],[2,2],[2,2]], columns=['x','y'], dtype=float) + expected_2 = DataFrame([[1,1,1],[2,2,2],[2,2,2]], columns=['x','y','z'], dtype=float) + + self.assert_frame_equal(df_inf.rolling(window=2).count(), expected_1) + self.assert_frame_equal(df_date.rolling(window=2).count(), expected_1) + self.assert_frame_equal(df_inf_date.rolling(window=2).count(), expected_2) + def test_window_with_args(self): tm._skip_if_no_scipy() From cb8493558031c8327b1b7caf68235f61d90887d3 Mon Sep 17 00:00:00 2001 From: Giacomo Ferroni Date: Sat, 21 Jan 2017 18:42:38 +0000 Subject: [PATCH 3/5] pylint checks --- pandas/tests/test_window.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 0feecd2c59c57..eaf493314d6c3 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -210,16 +210,22 @@ def f(): def test_count_nonnumeric_types(self): # GH12541 df_inf = DataFrame({'x': [1, 2, 3], 'y': [1., 2., np.Inf]}) - df_date = DataFrame({'x': [1, 2, 3], 'y': pd.date_range('20130101',periods=3)}) + df_date = DataFrame({'x': [1, 2, 3], + 'y': pd.date_range('20130101',periods=3)}) df_inf_date = DataFrame({'x': [1, 2, 3], 'y': [1., 2., np.Inf], 'z': pd.date_range('20170101',periods=3)}) - expected_1 = DataFrame([[1,1],[2,2],[2,2]], columns=['x','y'], dtype=float) - expected_2 = DataFrame([[1,1,1],[2,2,2],[2,2,2]], columns=['x','y','z'], dtype=float) - - self.assert_frame_equal(df_inf.rolling(window=2).count(), expected_1) - self.assert_frame_equal(df_date.rolling(window=2).count(), expected_1) - self.assert_frame_equal(df_inf_date.rolling(window=2).count(), expected_2) + expected_1 = DataFrame([[1,1],[2,2],[2,2]], + columns=['x','y'], dtype=float) + expected_2 = DataFrame([[1,1,1],[2,2,2],[2,2,2]], + columns=['x','y','z'], dtype=float) + + self.assert_frame_equal(df_inf.rolling(window=2).count(), + expected_1) + self.assert_frame_equal(df_date.rolling(window=2).count(), + expected_1) + self.assert_frame_equal(df_inf_date.rolling(window=2).count(), + expected_2) def test_window_with_args(self): tm._skip_if_no_scipy() From 96213156ead2a8cd5c4da69b4bb48667f9739a65 Mon Sep 17 00:00:00 2001 From: Giacomo Ferroni Date: Tue, 24 Jan 2017 10:44:11 +0000 Subject: [PATCH 4/5] Added extra test and updated whatsnew --- doc/source/whatsnew/v0.20.0.txt | 4 +++ pandas/tests/test_window.py | 46 +++++++++++++++++++++------------ 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index c82dc370e3e71..cddb489f44f50 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -362,3 +362,7 @@ Bug Fixes - Bug in ``pd.read_csv()`` for the C engine where ``usecols`` were being indexed incorrectly with ``parse_dates`` (:issue:`14792`) + + + +- Bug in window function ``count`` not counting ``np.Inf`` (:issue:`12541`) \ No newline at end of file diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index eaf493314d6c3..cf9d4a03d0f24 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -209,23 +209,35 @@ def f(): def test_count_nonnumeric_types(self): # GH12541 - df_inf = DataFrame({'x': [1, 2, 3], 'y': [1., 2., np.Inf]}) - df_date = DataFrame({'x': [1, 2, 3], - 'y': pd.date_range('20130101',periods=3)}) - df_inf_date = DataFrame({'x': [1, 2, 3], 'y': [1., 2., np.Inf], - 'z': pd.date_range('20170101',periods=3)}) - - expected_1 = DataFrame([[1,1],[2,2],[2,2]], - columns=['x','y'], dtype=float) - expected_2 = DataFrame([[1,1,1],[2,2,2],[2,2,2]], - columns=['x','y','z'], dtype=float) - - self.assert_frame_equal(df_inf.rolling(window=2).count(), - expected_1) - self.assert_frame_equal(df_date.rolling(window=2).count(), - expected_1) - self.assert_frame_equal(df_inf_date.rolling(window=2).count(), - expected_2) + cols = ['int', 'float', 'string', 'datetime', 'timedelta', + 'fl_inf', 'fl_nan', 'str_nan', 'dt_nat'] + + df = DataFrame( + {'int': [1, 2, 3], + 'float': [4., 5., 6.], + 'string': list('abc'), + 'datetime': pd.date_range('20170101', periods=3), + 'timedelta': pd.timedelta_range('1 s', periods=3, freq='s'), + 'fl_inf': [1., 2., np.Inf], + 'fl_nan': [1., 2., np.NaN], + 'str_nan': ['aa', 'bb', np.NaN], + 'dt_nat': [pd.Timestamp('20170101'), pd.Timestamp('20170203'), + pd.Timestamp(None)]}, + columns=cols) + + expected = DataFrame( + {'int': [1., 2., 2.], + 'float': [1., 2., 2.], + 'string': [1., 2., 2.], + 'datetime': [1., 2., 2.], + 'timedelta': [1., 2., 2.], + 'fl_inf': [1., 2., 2.], + 'fl_nan': [1., 2., 1.], + 'str_nan': [1., 2., 1.], + 'dt_nat': [1., 2., 1.]}, + columns=cols) + + self.assert_frame_equal(df.rolling(window=2).count(), expected) def test_window_with_args(self): tm._skip_if_no_scipy() From 65d70eb614d8b799f5c155768448763ceaa64e26 Mon Sep 17 00:00:00 2001 From: Giacomo Ferroni Date: Tue, 24 Jan 2017 14:47:02 +0000 Subject: [PATCH 5/5] Added Periods to the test --- pandas/tests/test_window.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index bfad4a4082d63..3f53b5eaf3753 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -209,8 +209,8 @@ def f(): def test_count_nonnumeric_types(self): # GH12541 - cols = ['int', 'float', 'string', 'datetime', 'timedelta', - 'fl_inf', 'fl_nan', 'str_nan', 'dt_nat'] + cols = ['int', 'float', 'string', 'datetime', 'timedelta', 'periods', + 'fl_inf', 'fl_nan', 'str_nan', 'dt_nat', 'periods_nat'] df = DataFrame( {'int': [1, 2, 3], @@ -218,11 +218,15 @@ def test_count_nonnumeric_types(self): 'string': list('abc'), 'datetime': pd.date_range('20170101', periods=3), 'timedelta': pd.timedelta_range('1 s', periods=3, freq='s'), + 'periods': [pd.Period('2012-01'), pd.Period('2012-02'), + pd.Period('2012-03')], 'fl_inf': [1., 2., np.Inf], 'fl_nan': [1., 2., np.NaN], 'str_nan': ['aa', 'bb', np.NaN], 'dt_nat': [pd.Timestamp('20170101'), pd.Timestamp('20170203'), - pd.Timestamp(None)]}, + pd.Timestamp(None)], + 'periods_nat': [pd.Period('2012-01'), pd.Period('2012-02'), + pd.Period(None)]}, columns=cols) expected = DataFrame( @@ -231,10 +235,12 @@ def test_count_nonnumeric_types(self): 'string': [1., 2., 2.], 'datetime': [1., 2., 2.], 'timedelta': [1., 2., 2.], + 'periods': [1., 2., 2.], 'fl_inf': [1., 2., 2.], 'fl_nan': [1., 2., 1.], 'str_nan': [1., 2., 1.], - 'dt_nat': [1., 2., 1.]}, + 'dt_nat': [1., 2., 1.], + 'periods_nat': [1., 2., 1.]}, columns=cols) self.assert_frame_equal(df.rolling(window=2).count(), expected)