Skip to content

Commit 03d2d30

Browse files
committed
Merge pull request #3572 from cpcloud/plot-nonnumeric-data-exc-1818
ENH: plot only numeric data and raise an exception *before* plotting if there is no numeric data
2 parents 44ab793 + 1c8891e commit 03d2d30

File tree

5 files changed

+112
-55
lines changed

5 files changed

+112
-55
lines changed

RELEASE.rst

+9
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ pandas 0.11.1
6767
to specify custom column names of the returned DataFrame (GH3649_),
6868
thanks @hoechenberger
6969
- ``read_html`` no longer performs hard date conversion
70+
- Plotting functions now raise a ``TypeError`` before trying to plot anything
71+
if the associated objects have have a dtype of ``object`` (GH1818_,
72+
GH3572_). This happens before any drawing takes place which elimnates any
73+
spurious plots from showing up.
7074

7175
**API Changes**
7276

@@ -89,6 +93,9 @@ pandas 0.11.1
8993
- Raise on ``iloc`` when boolean indexing with a label based indexer mask
9094
e.g. a boolean Series, even with integer labels, will raise. Since ``iloc``
9195
is purely positional based, the labels on the Series are not alignable (GH3631_)
96+
- The ``raise_on_error`` option to plotting methods is obviated by GH3572_,
97+
so it is removed. Plots now always raise when data cannot be plotted or the
98+
object being plotted has a dtype of ``object``.
9299

93100
**Bug Fixes**
94101

@@ -227,6 +234,8 @@ pandas 0.11.1
227234
.. _GH3659: https://github.com/pydata/pandas/issues/3659
228235
.. _GH3649: https://github.com/pydata/pandas/issues/3649
229236
.. _Gh3616: https://github.com/pydata/pandas/issues/3616
237+
.. _GH1818: https://github.com/pydata/pandas/issues/1818
238+
.. _GH3572: https://github.com/pydata/pandas/issues/3572
230239

231240
pandas 0.11.0
232241
=============

doc/source/v0.11.1.txt

+15-1
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,13 @@ API changes
6161

6262
``df.iloc[mask]`` will raise a ``ValueError``
6363

64+
- The ``raise_on_error`` argument to plotting functions is removed. Instead,
65+
plotting functions raise a ``TypeError`` when the ``dtype`` of the object
66+
is ``object`` to remind you to avoid ``object`` arrays whenever possible
67+
and thus you should cast to an appropriate numeric dtype if you need to
68+
plot something.
69+
70+
6471

6572
Enhancements
6673
~~~~~~~~~~~~
@@ -118,7 +125,7 @@ Enhancements
118125

119126
The last element yielded by the iterator will be a ``Series`` containing
120127
the last element of the longest string in the ``Series`` with all other
121-
elements being ``NaN``. Here since ``'slow`` is the longest string
128+
elements being ``NaN``. Here since ``'slow'`` is the longest string
122129
and there are no other strings with the same length ``'w'`` is the only
123130
non-null string in the yielded ``Series``.
124131

@@ -158,6 +165,11 @@ Enhancements
158165
- ``pd.melt()`` now accepts the optional parameters ``var_name`` and ``value_name``
159166
to specify custom column names of the returned DataFrame.
160167

168+
- Plotting functions now raise a ``TypeError`` before trying to plot anything
169+
if the associated objects have have a ``dtype`` of ``object`` (GH1818_,
170+
GH3572_). This happens before any drawing takes place which elimnates any
171+
spurious plots from showing up.
172+
161173
Bug Fixes
162174
~~~~~~~~~
163175

@@ -227,3 +239,5 @@ on GitHub for a complete list.
227239
.. _GH3605: https://github.com/pydata/pandas/issues/3605
228240
.. _GH3606: https://github.com/pydata/pandas/issues/3606
229241
.. _GH3656: https://github.com/pydata/pandas/issues/3656
242+
.. _GH1818: https://github.com/pydata/pandas/issues/1818
243+
.. _GH3572: https://github.com/pydata/pandas/issues/3572

pandas/tests/test_graphics.py

+42-3
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,27 @@ def test_bootstrap_plot(self):
187187
from pandas.tools.plotting import bootstrap_plot
188188
_check_plot_works(bootstrap_plot, self.ts, size=10)
189189

190+
@slow
191+
def test_all_invalid_plot_data(self):
192+
s = Series(list('abcd'))
193+
kinds = 'line', 'bar', 'barh', 'kde', 'density'
194+
195+
for kind in kinds:
196+
self.assertRaises(TypeError, s.plot, kind=kind)
197+
198+
@slow
199+
def test_partially_invalid_plot_data(self):
200+
s = Series(['a', 'b', 1.0, 2])
201+
kinds = 'line', 'bar', 'barh', 'kde', 'density'
202+
203+
for kind in kinds:
204+
self.assertRaises(TypeError, s.plot, kind=kind)
205+
206+
@slow
207+
def test_invalid_kind(self):
208+
s = Series([1, 2])
209+
self.assertRaises(ValueError, s.plot, kind='aasdf')
210+
190211

191212
class TestDataFramePlots(unittest.TestCase):
192213

@@ -249,11 +270,9 @@ def test_nonnumeric_exclude(self):
249270
plt.close('all')
250271

251272
df = DataFrame({'A': ["x", "y", "z"], 'B': [1,2,3]})
252-
ax = df.plot(raise_on_error=False) # it works
273+
ax = df.plot()
253274
self.assert_(len(ax.get_lines()) == 1) #B was plotted
254275

255-
self.assertRaises(Exception, df.plot)
256-
257276
@slow
258277
def test_label(self):
259278
import matplotlib.pyplot as plt
@@ -688,6 +707,26 @@ def test_unordered_ts(self):
688707
ydata = ax.lines[0].get_ydata()
689708
self.assert_(np.all(ydata == np.array([1.0, 2.0, 3.0])))
690709

710+
@slow
711+
def test_all_invalid_plot_data(self):
712+
kinds = 'line', 'bar', 'barh', 'kde', 'density'
713+
df = DataFrame(list('abcd'))
714+
for kind in kinds:
715+
self.assertRaises(TypeError, df.plot, kind=kind)
716+
717+
@slow
718+
def test_partially_invalid_plot_data(self):
719+
kinds = 'line', 'bar', 'barh', 'kde', 'density'
720+
df = DataFrame(np.random.randn(10, 2), dtype=object)
721+
df[np.random.rand(df.shape[0]) > 0.5] = 'a'
722+
for kind in kinds:
723+
self.assertRaises(TypeError, df.plot, kind=kind)
724+
725+
@slow
726+
def test_invalid_kind(self):
727+
df = DataFrame(np.random.randn(10, 2))
728+
self.assertRaises(ValueError, df.plot, kind='aasdf')
729+
691730
class TestDataFrameGroupByPlots(unittest.TestCase):
692731

693732
@classmethod

pandas/tools/plotting.py

+44-45
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# being a bit too dynamic
22
# pylint: disable=E1101
3-
from itertools import izip
43
import datetime
54
import warnings
65
import re
@@ -701,10 +700,8 @@ class MPLPlot(object):
701700
"""
702701
_default_rot = 0
703702

704-
_pop_attributes = ['label', 'style', 'logy', 'logx', 'loglog',
705-
'raise_on_error']
706-
_attr_defaults = {'logy': False, 'logx': False, 'loglog': False,
707-
'raise_on_error': True}
703+
_pop_attributes = ['label', 'style', 'logy', 'logx', 'loglog']
704+
_attr_defaults = {'logy': False, 'logx': False, 'loglog': False}
708705

709706
def __init__(self, data, kind=None, by=None, subplots=False, sharex=True,
710707
sharey=False, use_index=True,
@@ -875,7 +872,27 @@ def _get_layout(self):
875872
return (len(self.data.columns), 1)
876873

877874
def _compute_plot_data(self):
878-
pass
875+
try:
876+
# might be a frame
877+
numeric_data = self.data._get_numeric_data()
878+
except AttributeError:
879+
# a series, but no object dtypes allowed!
880+
if self.data.dtype == np.object_:
881+
raise TypeError('invalid dtype for plotting, please cast to a '
882+
'numeric dtype explicitly if you want to plot')
883+
884+
numeric_data = self.data
885+
886+
try:
887+
is_empty = numeric_data.empty
888+
except AttributeError:
889+
is_empty = not len(numeric_data)
890+
891+
# no empty frames or series allowed
892+
if is_empty:
893+
raise TypeError('No numeric data to plot')
894+
895+
self.data = numeric_data
879896

880897
def _make_plot(self):
881898
raise NotImplementedError
@@ -1184,27 +1201,17 @@ def _make_plot(self):
11841201
else:
11851202
args = (ax, x, y, style)
11861203

1187-
try:
1188-
newline = plotf(*args, **kwds)[0]
1189-
lines.append(newline)
1190-
leg_label = label
1191-
if self.mark_right and self.on_right(i):
1192-
leg_label += ' (right)'
1193-
labels.append(leg_label)
1194-
ax.grid(self.grid)
1195-
1196-
if self._is_datetype():
1197-
left, right = _get_xlim(lines)
1198-
ax.set_xlim(left, right)
1199-
except AttributeError as inst: # non-numeric
1200-
msg = ('Unable to plot data %s vs index %s,\n'
1201-
'error was: %s' % (str(y), str(x), str(inst)))
1202-
if not self.raise_on_error:
1203-
print msg
1204-
else:
1205-
msg = msg + ('\nConsider setting raise_on_error=False'
1206-
'to suppress')
1207-
raise Exception(msg)
1204+
newline = plotf(*args, **kwds)[0]
1205+
lines.append(newline)
1206+
leg_label = label
1207+
if self.mark_right and self.on_right(i):
1208+
leg_label += ' (right)'
1209+
labels.append(leg_label)
1210+
ax.grid(self.grid)
1211+
1212+
if self._is_datetype():
1213+
left, right = _get_xlim(lines)
1214+
ax.set_xlim(left, right)
12081215

12091216
self._make_legend(lines, labels)
12101217

@@ -1223,22 +1230,12 @@ def to_leg_label(label, i):
12231230
return label
12241231

12251232
def _plot(data, col_num, ax, label, style, **kwds):
1226-
try:
1227-
newlines = tsplot(data, plotf, ax=ax, label=label,
1228-
style=style, **kwds)
1229-
ax.grid(self.grid)
1230-
lines.append(newlines[0])
1231-
leg_label = to_leg_label(label, col_num)
1232-
labels.append(leg_label)
1233-
except AttributeError as inst: #non-numeric
1234-
msg = ('Unable to plot %s,\n'
1235-
'error was: %s' % (str(data), str(inst)))
1236-
if not self.raise_on_error:
1237-
print msg
1238-
else:
1239-
msg = msg + ('\nConsider setting raise_on_error=False'
1240-
'to suppress')
1241-
raise Exception(msg)
1233+
newlines = tsplot(data, plotf, ax=ax, label=label,
1234+
style=style, **kwds)
1235+
ax.grid(self.grid)
1236+
lines.append(newlines[0])
1237+
leg_label = to_leg_label(label, col_num)
1238+
labels.append(leg_label)
12421239

12431240
if isinstance(data, Series):
12441241
ax = self._get_ax(0) # self.axes[0]
@@ -1610,8 +1607,8 @@ def plot_series(series, label=None, kind='line', use_index=True, rot=None,
16101607
If not passed, uses gca()
16111608
style : string, default matplotlib default
16121609
matplotlib line style to use
1613-
grid : matplot grid
1614-
legend: matplot legende
1610+
grid : matplotlib grid
1611+
legend: matplotlib legend
16151612
logx : boolean, default False
16161613
For line plots, use log scaling on x axis
16171614
logy : boolean, default False
@@ -1633,6 +1630,8 @@ def plot_series(series, label=None, kind='line', use_index=True, rot=None,
16331630
klass = BarPlot
16341631
elif kind == 'kde':
16351632
klass = KdePlot
1633+
else:
1634+
raise ValueError('Invalid chart type given %s' % kind)
16361635

16371636
"""
16381637
If no axis is specified, we check whether there are existing figures.

pandas/tseries/tests/test_plotting.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -87,18 +87,14 @@ def test_nonnumeric_exclude(self):
8787

8888
idx = date_range('1/1/1987', freq='A', periods=3)
8989
df = DataFrame({'A': ["x", "y", "z"], 'B': [1,2,3]}, idx)
90-
self.assertRaises(Exception, df.plot)
9190

9291
plt.close('all')
93-
ax = df.plot(raise_on_error=False) # it works
92+
ax = df.plot() # it works
9493
self.assert_(len(ax.get_lines()) == 1) #B was plotted
9594

9695
plt.close('all')
97-
self.assertRaises(Exception, df.A.plot)
9896

99-
plt.close('all')
100-
ax = df['A'].plot(raise_on_error=False) # it works
101-
self.assert_(len(ax.get_lines()) == 0)
97+
self.assertRaises(TypeError, df['A'].plot)
10298

10399
@slow
104100
def test_tsplot(self):

0 commit comments

Comments
 (0)