From 19391f822fea29a8f466d243f6a603db05476658 Mon Sep 17 00:00:00 2001 From: Magellnea Date: Tue, 19 Aug 2014 00:04:19 +0300 Subject: [PATCH 1/6] Adding a warning when dropping NA values for panel.to_frame #7879 --- pandas/core/panel.py | 3 +++ pandas/tests/test_panel.py | 32 +++++++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 1e6ed56386f63..9327ca0c07842 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -858,6 +858,9 @@ def to_frame(self, filter_observations=True): mask = com.notnull(self.values).all(axis=0) # size = mask.sum() selector = mask.ravel() + if not np.all(selector): + warnings.warn("NaN values found\ + empty values will be dropped", RuntimeWarning) else: # size = N * K selector = slice(None, None) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index fb1f1c1693fdd..59f3e9d6004ab 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -5,7 +5,7 @@ import nose import numpy as np - +import warnings from pandas import Series, DataFrame, Index, isnull, notnull, pivot, MultiIndex from pandas.core.datetools import bday from pandas.core.panel import Panel @@ -1556,6 +1556,36 @@ def test_to_frame_multi_drop_level(self): expected = DataFrame({'i1': [1., 2], 'i2': [1., 2]}, index=exp_idx) assert_frame_equal(result, expected) + def test_to_frame_na_drop_warnings(self): + def create_a_panel_with_na_vals(filter_observations=True): + df1 = DataFrame(np.random.randn(2, 3), columns=['A', 'B', 'C'], + index=['foo', 'bar']) + df2 = DataFrame(np.random.randn(2, 3), columns=['A', 'B', 'C'], + index=['foo', 'bar']) + df2.loc['foo', 'B'] = np.nan + dict_with_dropped_vals = {'df1': df1, 'df2': df2} + Panel(dict_with_dropped_vals).\ + to_frame(filter_observations=filter_observations) + + def create_a_panel_without_na_vals(filter_observations=True): + df1 = DataFrame(np.random.randn(2, 3), columns=['A', 'B', 'C'], + index=['foo', 'bar']) + df2 = DataFrame(np.random.randn(2, 3), columns=['A', 'B', 'C'], + index=['foo', 'bar']) + dict_with_dropped_vals = {'df1': df1, 'df2': df2} + Panel(dict_with_dropped_vals).\ + to_frame(filter_observations=filter_observations) + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + create_a_panel_with_na_vals() + create_a_panel_with_na_vals(False) + create_a_panel_without_na_vals() + create_a_panel_without_na_vals(False) + self.assertEqual(len(w), 1) + self.assertTrue(issubclass(w[0].category, RuntimeWarning)) + self.assertEqual(str(w[0].message), + "NaN values found, empty values will be dropped") + def test_to_panel_na_handling(self): df = DataFrame(np.random.randint(0, 10, size=20).reshape((10, 2)), index=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1], From 63d5a3e32f02b9d9d2dc7b9dd9115c5cc243de04 Mon Sep 17 00:00:00 2001 From: Magellnea Date: Tue, 19 Aug 2014 01:11:27 +0300 Subject: [PATCH 2/6] Adding missing import statement for warnings module --- pandas/core/panel.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 9327ca0c07842..0a706d9e043c8 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -8,6 +8,7 @@ from pandas import compat import sys import numpy as np +import warnings from pandas.core.common import (PandasError, _try_sort, _default_index, _infer_dtype_from_scalar, notnull) from pandas.core.categorical import Categorical From f0f75fcc01b7a6b49b7f369282a6fac3eac645e1 Mon Sep 17 00:00:00 2001 From: Magellnea Date: Wed, 20 Aug 2014 17:08:15 +0300 Subject: [PATCH 3/6] modify test cases to use tm.assert_produces_warning --- pandas/core/panel.py | 3 +-- pandas/tests/test_panel.py | 49 ++++++++++++++++---------------------- 2 files changed, 21 insertions(+), 31 deletions(-) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 0a706d9e043c8..cd9adab5bebed 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -860,8 +860,7 @@ def to_frame(self, filter_observations=True): # size = mask.sum() selector = mask.ravel() if not np.all(selector): - warnings.warn("NaN values found\ - empty values will be dropped", RuntimeWarning) + warnings.warn("NaN values found, empty values will be dropped", RuntimeWarning) else: # size = N * K selector = slice(None, None) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 59f3e9d6004ab..5daf848cc3a14 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1,11 +1,11 @@ # pylint: disable=W0612,E1101 from datetime import datetime +import warnings import operator import nose import numpy as np -import warnings from pandas import Series, DataFrame, Index, isnull, notnull, pivot, MultiIndex from pandas.core.datetools import bday from pandas.core.panel import Panel @@ -1557,34 +1557,25 @@ def test_to_frame_multi_drop_level(self): assert_frame_equal(result, expected) def test_to_frame_na_drop_warnings(self): - def create_a_panel_with_na_vals(filter_observations=True): - df1 = DataFrame(np.random.randn(2, 3), columns=['A', 'B', 'C'], - index=['foo', 'bar']) - df2 = DataFrame(np.random.randn(2, 3), columns=['A', 'B', 'C'], - index=['foo', 'bar']) - df2.loc['foo', 'B'] = np.nan - dict_with_dropped_vals = {'df1': df1, 'df2': df2} - Panel(dict_with_dropped_vals).\ - to_frame(filter_observations=filter_observations) - - def create_a_panel_without_na_vals(filter_observations=True): - df1 = DataFrame(np.random.randn(2, 3), columns=['A', 'B', 'C'], - index=['foo', 'bar']) - df2 = DataFrame(np.random.randn(2, 3), columns=['A', 'B', 'C'], - index=['foo', 'bar']) - dict_with_dropped_vals = {'df1': df1, 'df2': df2} - Panel(dict_with_dropped_vals).\ - to_frame(filter_observations=filter_observations) - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - create_a_panel_with_na_vals() - create_a_panel_with_na_vals(False) - create_a_panel_without_na_vals() - create_a_panel_without_na_vals(False) - self.assertEqual(len(w), 1) - self.assertTrue(issubclass(w[0].category, RuntimeWarning)) - self.assertEqual(str(w[0].message), - "NaN values found, empty values will be dropped") + df1 = DataFrame(np.random.randn(2, 3), columns=['A', 'B', 'C'], + index=['foo', 'bar']) + df2 = DataFrame(np.random.randn(2, 3), columns=['A', 'B', 'C'], + index=['foo', 'bar']) + df2.loc['foo', 'B'] = np.nan + dict_without_dropped_vals = {'df1': df1, 'df2': df2} + ## A panel without dropped vals shouldn't throw warnings + with tm.assert_produces_warning(False): + Panel(dict_without_dropped_vals).to_frame() + ## A panel with dropped vals should throw a Runtime warning if \ + # filter_observations is True + df2_with_na_vals = DataFrame(df2) + df2_with_na_vals.loc['foo', 'B'] = np.nan + dict_with_dropped_vals = {'df1': df1, 'df2_dropped': df2_with_na_vals} + with tm.assert_produces_warning(RuntimeWarning): + Panel(dict_with_dropped_vals).to_frame() + ##if filter_observations is False, a warning shouldn't be throws + with tm.assert_produces_warning(False): + Panel(dict_with_dropped_vals).to_frame(filter_observations=False) def test_to_panel_na_handling(self): df = DataFrame(np.random.randint(0, 10, size=20).reshape((10, 2)), From 3bd040e12f1b4aada560016b44d631fec33deac2 Mon Sep 17 00:00:00 2001 From: Magellnea Date: Wed, 20 Aug 2014 20:15:25 +0300 Subject: [PATCH 4/6] Minor fix - allow warning repetition from same source --- pandas/core/panel.py | 3 +++ pandas/tests/test_panel.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index cd9adab5bebed..6aaa30bcf2b69 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -39,6 +39,9 @@ "of\n %s" % _shared_doc_kwargs['axes_single_arg']) +# added to allow repetition of warnings +warnings.simplefilter('always', RuntimeWarning) + def _ensure_like_indices(time, panels): """ diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 5daf848cc3a14..5f608d6620ac5 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1561,14 +1561,14 @@ def test_to_frame_na_drop_warnings(self): index=['foo', 'bar']) df2 = DataFrame(np.random.randn(2, 3), columns=['A', 'B', 'C'], index=['foo', 'bar']) - df2.loc['foo', 'B'] = np.nan dict_without_dropped_vals = {'df1': df1, 'df2': df2} ## A panel without dropped vals shouldn't throw warnings with tm.assert_produces_warning(False): Panel(dict_without_dropped_vals).to_frame() ## A panel with dropped vals should throw a Runtime warning if \ # filter_observations is True - df2_with_na_vals = DataFrame(df2) + df2_with_na_vals = DataFrame(np.random.randn(2, 3), columns=['A', 'B', 'C'], + index=['foo', 'bar']) df2_with_na_vals.loc['foo', 'B'] = np.nan dict_with_dropped_vals = {'df1': df1, 'df2_dropped': df2_with_na_vals} with tm.assert_produces_warning(RuntimeWarning): From 982c8281a2426da904e2d43030c0a9c444af53ba Mon Sep 17 00:00:00 2001 From: Magellnea Date: Fri, 22 Aug 2014 23:16:27 +0300 Subject: [PATCH 5/6] change ```filter_observation``` to be true by default, change other tests accordingly --- pandas/core/panel.py | 2 +- pandas/tests/test_panel.py | 32 ++++++++++++++++---------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 6aaa30bcf2b69..e27dc0851e4a2 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -839,7 +839,7 @@ def groupby(self, function, axis='major'): axis = self._get_axis_number(axis) return PanelGroupBy(self, function, axis=axis) - def to_frame(self, filter_observations=True): + def to_frame(self, filter_observations=False): """ Transform wide format into long (stacked) format as DataFrame whose columns are the Panel's items and whose index is a MultiIndex formed diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 5f608d6620ac5..3e9f5b513566d 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -440,7 +440,7 @@ def test_delitem_and_pop(self): def test_setitem(self): # LongPanel with one item - lp = self.panel.filter(['ItemA', 'ItemB']).to_frame() + lp = self.panel.filter(['ItemA', 'ItemB']).to_frame(filter_observations=True) with tm.assertRaises(ValueError): self.panel['ItemE'] = lp @@ -1436,12 +1436,12 @@ def test_transpose_copy(self): def test_to_frame(self): # filtered - filtered = self.panel.to_frame() - expected = self.panel.to_frame().dropna(how='any') + filtered = self.panel.to_frame(filter_observations=True) + expected = self.panel.to_frame(filter_observations=True).dropna(how='any') assert_frame_equal(filtered, expected) # unfiltered - unfiltered = self.panel.to_frame(filter_observations=False) + unfiltered = self.panel.to_frame() assert_panel_equal(unfiltered.to_panel(), self.panel) # names @@ -1492,11 +1492,11 @@ def test_to_frame_multi_major(self): expected = DataFrame({'i1': [1, 'a', 1, 2, 'b', 1, 3, 'c', 1, 4, 'd', 1], 'i2': [1, 'a', 1, 2, 'b', 1, 3, 'c', 1, 4, 'd', 1]}, index=expected_idx) - result = wp.to_frame() + result = wp.to_frame(filter_observations=True) assert_frame_equal(result, expected) wp.iloc[0, 0].iloc[0] = np.nan # BUG on setting. GH #5773 - result = wp.to_frame() + result = wp.to_frame(filter_observations=True) assert_frame_equal(result, expected[1:]) idx = MultiIndex.from_tuples([(1, 'two'), (1, 'one'), (2, 'one'), @@ -1511,7 +1511,7 @@ def test_to_frame_multi_major(self): (np.nan, 'two', 'C')], names=[None, None, 'minor']) expected.index = ex_idx - result = wp.to_frame() + result = wp.to_frame(filter_observations=True) assert_frame_equal(result, expected) def test_to_frame_multi_major_minor(self): @@ -1542,7 +1542,7 @@ def test_to_frame_multi_major_minor(self): ['c', 'c'], ['d', 'd'], ['y', 'y'], ['z', 'z'], [-1, -1], [-2, -2], [-3, -3], [-4, -4], [-5, -5], [-6, -6], [-7, -7], [-8, -8]] - result = wp.to_frame() + result = wp.to_frame(filter_observations=True) expected = DataFrame(exp_val, columns=['i1', 'i2'], index=exp_idx) assert_frame_equal(result, expected) @@ -1550,7 +1550,7 @@ def test_to_frame_multi_drop_level(self): idx = MultiIndex.from_tuples([(1, 'one'), (2, 'one'), (2, 'two')]) df = DataFrame({'A': [np.nan, 1, 2]}, index=idx) wp = Panel({'i1': df, 'i2': df}) - result = wp.to_frame() + result = wp.to_frame(filter_observations=True) exp_idx = MultiIndex.from_tuples([(2, 'one', 'A'), (2, 'two', 'A')], names=[None, None, 'minor']) expected = DataFrame({'i1': [1., 2], 'i2': [1., 2]}, index=exp_idx) @@ -1571,11 +1571,11 @@ def test_to_frame_na_drop_warnings(self): index=['foo', 'bar']) df2_with_na_vals.loc['foo', 'B'] = np.nan dict_with_dropped_vals = {'df1': df1, 'df2_dropped': df2_with_na_vals} - with tm.assert_produces_warning(RuntimeWarning): + with tm.assert_produces_warning(False): Panel(dict_with_dropped_vals).to_frame() ##if filter_observations is False, a warning shouldn't be throws - with tm.assert_produces_warning(False): - Panel(dict_with_dropped_vals).to_frame(filter_observations=False) + with tm.assert_produces_warning(RuntimeWarning): + Panel(dict_with_dropped_vals).to_frame(filter_observations=True) def test_to_panel_na_handling(self): df = DataFrame(np.random.randint(0, 10, size=20).reshape((10, 2)), @@ -2100,14 +2100,14 @@ def setUp(self): panel = tm.makePanel() tm.add_nans(panel) - self.panel = panel.to_frame() - self.unfiltered_panel = panel.to_frame(filter_observations=False) + self.panel = panel.to_frame(filter_observations=True) + self.unfiltered_panel = panel.to_frame() def test_ops_differently_indexed(self): # trying to set non-identically indexed panel wp = self.panel.to_panel() wp2 = wp.reindex(major=wp.major_axis[:-1]) - lp2 = wp2.to_frame() + lp2 = wp2.to_frame(filter_observations=True) result = self.panel + lp2 assert_frame_equal(result.reindex(lp2.index), lp2 * 2) @@ -2218,7 +2218,7 @@ def test_truncate(self): wp2 = wp.reindex(major=new_index) - lp2 = wp2.to_frame() + lp2 = wp2.to_frame(filter_observations=True) lp_trunc = lp2.truncate(wp.major_axis[2], wp.major_axis[-2]) wp_trunc = wp2.truncate(wp.major_axis[2], wp.major_axis[-2]) From 2e0180d631a91e8713b275ff83c5299dba34a6bb Mon Sep 17 00:00:00 2001 From: Magellnea Date: Sat, 23 Aug 2014 00:05:22 +0300 Subject: [PATCH 6/6] Fix tests for sparse module --- pandas/sparse/tests/test_sparse.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py index 105f661f08b10..4e5a87876fefb 100644 --- a/pandas/sparse/tests/test_sparse.py +++ b/pandas/sparse/tests/test_sparse.py @@ -1410,7 +1410,7 @@ def _check(frame): dense_frame = frame.to_dense() wp = Panel.from_dict({'foo': frame}) - from_dense_lp = wp.to_frame() + from_dense_lp = wp.to_frame(filter_observations=True) from_sparse_lp = spf.stack_sparse_frame(frame) @@ -1629,8 +1629,8 @@ def test_to_dense(self): def test_to_frame(self): def _compare_with_dense(panel): - slp = panel.to_frame() - dlp = panel.to_dense().to_frame() + slp = panel.to_frame(filter_observations=True) + dlp = panel.to_dense().to_frame(filter_observations=True) self.assert_numpy_array_equal(slp.values, dlp.values) self.assertTrue(slp.index.equals(dlp.index))