Skip to content

ENH: plot only numeric data and raise an exception *before* plotting if there is no numeric data #3572

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 21, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ pandas 0.11.1
to specify custom column names of the returned DataFrame (GH3649_),
thanks @hoechenberger
- ``read_html`` no longer performs hard date conversion
- Plotting functions now raise a ``TypeError`` before trying to plot anything
if the associated objects have have a dtype of ``object`` (GH1818_,
GH3572_). This happens before any drawing takes place which elimnates any
spurious plots from showing up.

**API Changes**

Expand All @@ -89,6 +93,9 @@ pandas 0.11.1
- Raise on ``iloc`` when boolean indexing with a label based indexer mask
e.g. a boolean Series, even with integer labels, will raise. Since ``iloc``
is purely positional based, the labels on the Series are not alignable (GH3631_)
- The ``raise_on_error`` option to plotting methods is obviated by GH3572_,
so it is removed. Plots now always raise when data cannot be plotted or the
object being plotted has a dtype of ``object``.

**Bug Fixes**

Expand Down Expand Up @@ -227,6 +234,8 @@ pandas 0.11.1
.. _GH3659: https://github.com/pydata/pandas/issues/3659
.. _GH3649: https://github.com/pydata/pandas/issues/3649
.. _Gh3616: https://github.com/pydata/pandas/issues/3616
.. _GH1818: https://github.com/pydata/pandas/issues/1818
.. _GH3572: https://github.com/pydata/pandas/issues/3572

pandas 0.11.0
=============
Expand Down
16 changes: 15 additions & 1 deletion doc/source/v0.11.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,13 @@ API changes

``df.iloc[mask]`` will raise a ``ValueError``

- The ``raise_on_error`` argument to plotting functions is removed. Instead,
plotting functions raise a ``TypeError`` when the ``dtype`` of the object
is ``object`` to remind you to avoid ``object`` arrays whenever possible
and thus you should cast to an appropriate numeric dtype if you need to
plot something.



Enhancements
~~~~~~~~~~~~
Expand Down Expand Up @@ -118,7 +125,7 @@ Enhancements

The last element yielded by the iterator will be a ``Series`` containing
the last element of the longest string in the ``Series`` with all other
elements being ``NaN``. Here since ``'slow`` is the longest string
elements being ``NaN``. Here since ``'slow'`` is the longest string
and there are no other strings with the same length ``'w'`` is the only
non-null string in the yielded ``Series``.

Expand Down Expand Up @@ -158,6 +165,11 @@ Enhancements
- ``pd.melt()`` now accepts the optional parameters ``var_name`` and ``value_name``
to specify custom column names of the returned DataFrame.

- Plotting functions now raise a ``TypeError`` before trying to plot anything
if the associated objects have have a ``dtype`` of ``object`` (GH1818_,
GH3572_). This happens before any drawing takes place which elimnates any
spurious plots from showing up.

Bug Fixes
~~~~~~~~~

Expand Down Expand Up @@ -227,3 +239,5 @@ on GitHub for a complete list.
.. _GH3605: https://github.com/pydata/pandas/issues/3605
.. _GH3606: https://github.com/pydata/pandas/issues/3606
.. _GH3656: https://github.com/pydata/pandas/issues/3656
.. _GH1818: https://github.com/pydata/pandas/issues/1818
.. _GH3572: https://github.com/pydata/pandas/issues/3572
45 changes: 42 additions & 3 deletions pandas/tests/test_graphics.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,27 @@ def test_bootstrap_plot(self):
from pandas.tools.plotting import bootstrap_plot
_check_plot_works(bootstrap_plot, self.ts, size=10)

@slow
def test_all_invalid_plot_data(self):
s = Series(list('abcd'))
kinds = 'line', 'bar', 'barh', 'kde', 'density'

for kind in kinds:
self.assertRaises(TypeError, s.plot, kind=kind)

@slow
def test_partially_invalid_plot_data(self):
s = Series(['a', 'b', 1.0, 2])
kinds = 'line', 'bar', 'barh', 'kde', 'density'

for kind in kinds:
self.assertRaises(TypeError, s.plot, kind=kind)

@slow
def test_invalid_kind(self):
s = Series([1, 2])
self.assertRaises(ValueError, s.plot, kind='aasdf')


class TestDataFramePlots(unittest.TestCase):

Expand Down Expand Up @@ -249,11 +270,9 @@ def test_nonnumeric_exclude(self):
plt.close('all')

df = DataFrame({'A': ["x", "y", "z"], 'B': [1,2,3]})
ax = df.plot(raise_on_error=False) # it works
ax = df.plot()
self.assert_(len(ax.get_lines()) == 1) #B was plotted

self.assertRaises(Exception, df.plot)

@slow
def test_label(self):
import matplotlib.pyplot as plt
Expand Down Expand Up @@ -688,6 +707,26 @@ def test_unordered_ts(self):
ydata = ax.lines[0].get_ydata()
self.assert_(np.all(ydata == np.array([1.0, 2.0, 3.0])))

@slow
def test_all_invalid_plot_data(self):
kinds = 'line', 'bar', 'barh', 'kde', 'density'
df = DataFrame(list('abcd'))
for kind in kinds:
self.assertRaises(TypeError, df.plot, kind=kind)

@slow
def test_partially_invalid_plot_data(self):
kinds = 'line', 'bar', 'barh', 'kde', 'density'
df = DataFrame(np.random.randn(10, 2), dtype=object)
df[np.random.rand(df.shape[0]) > 0.5] = 'a'
for kind in kinds:
self.assertRaises(TypeError, df.plot, kind=kind)

@slow
def test_invalid_kind(self):
df = DataFrame(np.random.randn(10, 2))
self.assertRaises(ValueError, df.plot, kind='aasdf')

class TestDataFrameGroupByPlots(unittest.TestCase):

@classmethod
Expand Down
89 changes: 44 additions & 45 deletions pandas/tools/plotting.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# being a bit too dynamic
# pylint: disable=E1101
from itertools import izip
import datetime
import warnings
import re
Expand Down Expand Up @@ -701,10 +700,8 @@ class MPLPlot(object):
"""
_default_rot = 0

_pop_attributes = ['label', 'style', 'logy', 'logx', 'loglog',
'raise_on_error']
_attr_defaults = {'logy': False, 'logx': False, 'loglog': False,
'raise_on_error': True}
_pop_attributes = ['label', 'style', 'logy', 'logx', 'loglog']
_attr_defaults = {'logy': False, 'logx': False, 'loglog': False}

def __init__(self, data, kind=None, by=None, subplots=False, sharex=True,
sharey=False, use_index=True,
Expand Down Expand Up @@ -875,7 +872,27 @@ def _get_layout(self):
return (len(self.data.columns), 1)

def _compute_plot_data(self):
pass
try:
# might be a frame
numeric_data = self.data._get_numeric_data()
except AttributeError:
# a series, but no object dtypes allowed!
if self.data.dtype == np.object_:
raise TypeError('invalid dtype for plotting, please cast to a '
'numeric dtype explicitly if you want to plot')

numeric_data = self.data

try:
is_empty = numeric_data.empty
except AttributeError:
is_empty = not len(numeric_data)

# no empty frames or series allowed
if is_empty:
raise TypeError('No numeric data to plot')

self.data = numeric_data

def _make_plot(self):
raise NotImplementedError
Expand Down Expand Up @@ -1184,27 +1201,17 @@ def _make_plot(self):
else:
args = (ax, x, y, style)

try:
newline = plotf(*args, **kwds)[0]
lines.append(newline)
leg_label = label
if self.mark_right and self.on_right(i):
leg_label += ' (right)'
labels.append(leg_label)
ax.grid(self.grid)

if self._is_datetype():
left, right = _get_xlim(lines)
ax.set_xlim(left, right)
except AttributeError as inst: # non-numeric
msg = ('Unable to plot data %s vs index %s,\n'
'error was: %s' % (str(y), str(x), str(inst)))
if not self.raise_on_error:
print msg
else:
msg = msg + ('\nConsider setting raise_on_error=False'
'to suppress')
raise Exception(msg)
newline = plotf(*args, **kwds)[0]
lines.append(newline)
leg_label = label
if self.mark_right and self.on_right(i):
leg_label += ' (right)'
labels.append(leg_label)
ax.grid(self.grid)

if self._is_datetype():
left, right = _get_xlim(lines)
ax.set_xlim(left, right)

self._make_legend(lines, labels)

Expand All @@ -1223,22 +1230,12 @@ def to_leg_label(label, i):
return label

def _plot(data, col_num, ax, label, style, **kwds):
try:
newlines = tsplot(data, plotf, ax=ax, label=label,
style=style, **kwds)
ax.grid(self.grid)
lines.append(newlines[0])
leg_label = to_leg_label(label, col_num)
labels.append(leg_label)
except AttributeError as inst: #non-numeric
msg = ('Unable to plot %s,\n'
'error was: %s' % (str(data), str(inst)))
if not self.raise_on_error:
print msg
else:
msg = msg + ('\nConsider setting raise_on_error=False'
'to suppress')
raise Exception(msg)
newlines = tsplot(data, plotf, ax=ax, label=label,
style=style, **kwds)
ax.grid(self.grid)
lines.append(newlines[0])
leg_label = to_leg_label(label, col_num)
labels.append(leg_label)

if isinstance(data, Series):
ax = self._get_ax(0) # self.axes[0]
Expand Down Expand Up @@ -1610,8 +1607,8 @@ def plot_series(series, label=None, kind='line', use_index=True, rot=None,
If not passed, uses gca()
style : string, default matplotlib default
matplotlib line style to use
grid : matplot grid
legend: matplot legende
grid : matplotlib grid
legend: matplotlib legend
logx : boolean, default False
For line plots, use log scaling on x axis
logy : boolean, default False
Expand All @@ -1633,6 +1630,8 @@ def plot_series(series, label=None, kind='line', use_index=True, rot=None,
klass = BarPlot
elif kind == 'kde':
klass = KdePlot
else:
raise ValueError('Invalid chart type given %s' % kind)

"""
If no axis is specified, we check whether there are existing figures.
Expand Down
8 changes: 2 additions & 6 deletions pandas/tseries/tests/test_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,18 +87,14 @@ def test_nonnumeric_exclude(self):

idx = date_range('1/1/1987', freq='A', periods=3)
df = DataFrame({'A': ["x", "y", "z"], 'B': [1,2,3]}, idx)
self.assertRaises(Exception, df.plot)

plt.close('all')
ax = df.plot(raise_on_error=False) # it works
ax = df.plot() # it works
self.assert_(len(ax.get_lines()) == 1) #B was plotted

plt.close('all')
self.assertRaises(Exception, df.A.plot)

plt.close('all')
ax = df['A'].plot(raise_on_error=False) # it works
self.assert_(len(ax.get_lines()) == 0)
self.assertRaises(TypeError, df['A'].plot)

@slow
def test_tsplot(self):
Expand Down