Skip to content

Adding a warning when dropping NA values for panel.to_frame #7879 #8063

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pandas import compat
import sys
import numpy as np
import warnings
from pandas.core.common import (PandasError, _try_sort, _default_index,
_infer_dtype_from_scalar, notnull)
from pandas.core.categorical import Categorical
Expand Down Expand Up @@ -38,6 +39,9 @@
"of\n %s" %
_shared_doc_kwargs['axes_single_arg'])

# added to allow repetition of warnings
warnings.simplefilter('always', RuntimeWarning)


def _ensure_like_indices(time, panels):
"""
Expand Down Expand Up @@ -835,7 +839,7 @@ def groupby(self, function, axis='major'):
axis = self._get_axis_number(axis)
return PanelGroupBy(self, function, axis=axis)

def to_frame(self, filter_observations=True):
def to_frame(self, filter_observations=False):
"""
Transform wide format into long (stacked) format as DataFrame whose
columns are the Panel's items and whose index is a MultiIndex formed
Expand All @@ -858,6 +862,8 @@ def to_frame(self, filter_observations=True):
mask = com.notnull(self.values).all(axis=0)
# size = mask.sum()
selector = mask.ravel()
if not np.all(selector):
warnings.warn("NaN values found, empty values will be dropped", RuntimeWarning)
else:
# size = N * K
selector = slice(None, None)
Expand Down
6 changes: 3 additions & 3 deletions pandas/sparse/tests/test_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -1410,7 +1410,7 @@ def _check(frame):
dense_frame = frame.to_dense()

wp = Panel.from_dict({'foo': frame})
from_dense_lp = wp.to_frame()
from_dense_lp = wp.to_frame(filter_observations=True)

from_sparse_lp = spf.stack_sparse_frame(frame)

Expand Down Expand Up @@ -1629,8 +1629,8 @@ def test_to_dense(self):

def test_to_frame(self):
def _compare_with_dense(panel):
slp = panel.to_frame()
dlp = panel.to_dense().to_frame()
slp = panel.to_frame(filter_observations=True)
dlp = panel.to_dense().to_frame(filter_observations=True)

self.assert_numpy_array_equal(slp.values, dlp.values)
self.assertTrue(slp.index.equals(dlp.index))
Expand Down
49 changes: 35 additions & 14 deletions pandas/tests/test_panel.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# pylint: disable=W0612,E1101

from datetime import datetime
import warnings
import operator
import nose

import numpy as np

from pandas import Series, DataFrame, Index, isnull, notnull, pivot, MultiIndex
from pandas.core.datetools import bday
from pandas.core.panel import Panel
Expand Down Expand Up @@ -440,7 +440,7 @@ def test_delitem_and_pop(self):

def test_setitem(self):
# LongPanel with one item
lp = self.panel.filter(['ItemA', 'ItemB']).to_frame()
lp = self.panel.filter(['ItemA', 'ItemB']).to_frame(filter_observations=True)
with tm.assertRaises(ValueError):
self.panel['ItemE'] = lp

Expand Down Expand Up @@ -1436,12 +1436,12 @@ def test_transpose_copy(self):

def test_to_frame(self):
# filtered
filtered = self.panel.to_frame()
expected = self.panel.to_frame().dropna(how='any')
filtered = self.panel.to_frame(filter_observations=True)
expected = self.panel.to_frame(filter_observations=True).dropna(how='any')
assert_frame_equal(filtered, expected)

# unfiltered
unfiltered = self.panel.to_frame(filter_observations=False)
unfiltered = self.panel.to_frame()
assert_panel_equal(unfiltered.to_panel(), self.panel)

# names
Expand Down Expand Up @@ -1492,11 +1492,11 @@ def test_to_frame_multi_major(self):
expected = DataFrame({'i1': [1, 'a', 1, 2, 'b', 1, 3, 'c', 1, 4, 'd', 1],
'i2': [1, 'a', 1, 2, 'b', 1, 3, 'c', 1, 4, 'd', 1]},
index=expected_idx)
result = wp.to_frame()
result = wp.to_frame(filter_observations=True)
assert_frame_equal(result, expected)

wp.iloc[0, 0].iloc[0] = np.nan # BUG on setting. GH #5773
result = wp.to_frame()
result = wp.to_frame(filter_observations=True)
assert_frame_equal(result, expected[1:])

idx = MultiIndex.from_tuples([(1, 'two'), (1, 'one'), (2, 'one'),
Expand All @@ -1511,7 +1511,7 @@ def test_to_frame_multi_major(self):
(np.nan, 'two', 'C')],
names=[None, None, 'minor'])
expected.index = ex_idx
result = wp.to_frame()
result = wp.to_frame(filter_observations=True)
assert_frame_equal(result, expected)

def test_to_frame_multi_major_minor(self):
Expand Down Expand Up @@ -1542,20 +1542,41 @@ def test_to_frame_multi_major_minor(self):
['c', 'c'], ['d', 'd'], ['y', 'y'], ['z', 'z'], [-1, -1],
[-2, -2], [-3, -3], [-4, -4], [-5, -5], [-6, -6], [-7, -7],
[-8, -8]]
result = wp.to_frame()
result = wp.to_frame(filter_observations=True)
expected = DataFrame(exp_val, columns=['i1', 'i2'], index=exp_idx)
assert_frame_equal(result, expected)

def test_to_frame_multi_drop_level(self):
idx = MultiIndex.from_tuples([(1, 'one'), (2, 'one'), (2, 'two')])
df = DataFrame({'A': [np.nan, 1, 2]}, index=idx)
wp = Panel({'i1': df, 'i2': df})
result = wp.to_frame()
result = wp.to_frame(filter_observations=True)
exp_idx = MultiIndex.from_tuples([(2, 'one', 'A'), (2, 'two', 'A')],
names=[None, None, 'minor'])
expected = DataFrame({'i1': [1., 2], 'i2': [1., 2]}, index=exp_idx)
assert_frame_equal(result, expected)

def test_to_frame_na_drop_warnings(self):
df1 = DataFrame(np.random.randn(2, 3), columns=['A', 'B', 'C'],
index=['foo', 'bar'])
df2 = DataFrame(np.random.randn(2, 3), columns=['A', 'B', 'C'],
index=['foo', 'bar'])
dict_without_dropped_vals = {'df1': df1, 'df2': df2}
## A panel without dropped vals shouldn't throw warnings
with tm.assert_produces_warning(False):
Panel(dict_without_dropped_vals).to_frame()
## A panel with dropped vals should throw a Runtime warning if \
# filter_observations is True
df2_with_na_vals = DataFrame(np.random.randn(2, 3), columns=['A', 'B', 'C'],
index=['foo', 'bar'])
df2_with_na_vals.loc['foo', 'B'] = np.nan
dict_with_dropped_vals = {'df1': df1, 'df2_dropped': df2_with_na_vals}
with tm.assert_produces_warning(False):
Panel(dict_with_dropped_vals).to_frame()
##if filter_observations is False, a warning shouldn't be throws
with tm.assert_produces_warning(RuntimeWarning):
Panel(dict_with_dropped_vals).to_frame(filter_observations=True)

def test_to_panel_na_handling(self):
df = DataFrame(np.random.randint(0, 10, size=20).reshape((10, 2)),
index=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
Expand Down Expand Up @@ -2079,14 +2100,14 @@ def setUp(self):
panel = tm.makePanel()
tm.add_nans(panel)

self.panel = panel.to_frame()
self.unfiltered_panel = panel.to_frame(filter_observations=False)
self.panel = panel.to_frame(filter_observations=True)
self.unfiltered_panel = panel.to_frame()

def test_ops_differently_indexed(self):
# trying to set non-identically indexed panel
wp = self.panel.to_panel()
wp2 = wp.reindex(major=wp.major_axis[:-1])
lp2 = wp2.to_frame()
lp2 = wp2.to_frame(filter_observations=True)

result = self.panel + lp2
assert_frame_equal(result.reindex(lp2.index), lp2 * 2)
Expand Down Expand Up @@ -2197,7 +2218,7 @@ def test_truncate(self):

wp2 = wp.reindex(major=new_index)

lp2 = wp2.to_frame()
lp2 = wp2.to_frame(filter_observations=True)
lp_trunc = lp2.truncate(wp.major_axis[2], wp.major_axis[-2])

wp_trunc = wp2.truncate(wp.major_axis[2], wp.major_axis[-2])
Expand Down