From 0a83a1f8140838f8f02151dc2f8a9f10eef0a084 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Sat, 15 Jun 2013 05:23:41 -0400 Subject: [PATCH 1/4] FIX/ENH: attempt soft conversion of object series before raising a TypeError when plotting --- pandas/tests/test_graphics.py | 10 ++++++++-- pandas/tools/plotting.py | 10 +++++----- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 0755caf45d336..56b3c337162bc 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -189,8 +189,7 @@ def test_bootstrap_plot(self): from pandas.tools.plotting import bootstrap_plot _check_plot_works(bootstrap_plot, self.ts, size=10) - @slow - def test_all_invalid_plot_data(self): + def test_invalid_plot_data(self): s = Series(list('abcd')) kinds = 'line', 'bar', 'barh', 'kde', 'density' @@ -198,6 +197,13 @@ def test_all_invalid_plot_data(self): self.assertRaises(TypeError, s.plot, kind=kind) @slow + def test_valid_object_plot(self): + s = Series(range(10), dtype=object) + kinds = 'line', 'bar', 'barh', 'kde', 'density' + + for kind in kinds: + _check_plot_works(s.plot, kind=kind) + def test_partially_invalid_plot_data(self): s = Series(['a', 'b', 1.0, 2]) kinds = 'line', 'bar', 'barh', 'kde', 'density' diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 83ad58c1eb41c..3c7bd7fb5d704 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -881,12 +881,12 @@ def _compute_plot_data(self): # might be a frame numeric_data = self.data._get_numeric_data() except AttributeError: - # a series, but no object dtypes allowed! - if self.data.dtype == np.object_: - raise TypeError('invalid dtype for plotting, please cast to a ' - 'numeric dtype explicitly if you want to plot') + # attempt soft conversion + numeric_data = self.data.convert_objects() - numeric_data = self.data + # a series, but no object dtypes allowed! + if numeric_data.dtype == np.object_: + raise TypeError('invalid dtype for plotting') try: is_empty = numeric_data.empty From 6eeff1a7c8256d37edc74ff0542fd305b6c6afcb Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Sat, 15 Jun 2013 06:05:23 -0400 Subject: [PATCH 2/4] DOC: add release notes/whatsnew --- RELEASE.rst | 14 ++++++++------ doc/source/v0.11.1.txt | 10 +++++++--- pandas/tests/test_graphics.py | 5 ++++- pandas/tools/plotting.py | 19 ++++++++++++++----- pandas/util/testing.py | 13 +++++++++++++ 5 files changed, 46 insertions(+), 15 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index 9d862c687bcac..f03e10df1b460 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -77,8 +77,10 @@ pandas 0.11.1 dependencies offered for Linux) (GH3837_). - Plotting functions now raise a ``TypeError`` before trying to plot anything if the associated objects have have a dtype of ``object`` (GH1818_, - GH3572_). This happens before any drawing takes place which elimnates any - spurious plots from showing up. + GH3572_, GH3911_, GH3912_), but they will try to convert object arrays to + numeric arrays if possible so that you can still plot, for example, an + object array with floats. This happens before any drawing takes place which + elimnates any spurious plots from showing up. - Added Faq section on repr display options, to help users customize their setup. - ``where`` operations that result in block splitting are much faster (GH3733_) - Series and DataFrame hist methods now take a ``figsize`` argument (GH3834_) @@ -341,13 +343,13 @@ pandas 0.11.1 .. _GH3834: https://github.com/pydata/pandas/issues/3834 .. _GH3873: https://github.com/pydata/pandas/issues/3873 .. _GH3877: https://github.com/pydata/pandas/issues/3877 +.. _GH3659: https://github.com/pydata/pandas/issues/3659 +.. _GH3679: https://github.com/pydata/pandas/issues/3679 .. _GH3880: https://github.com/pydata/pandas/issues/3880 -<<<<<<< HEAD .. _GH3911: https://github.com/pydata/pandas/issues/3911 -======= .. _GH3907: https://github.com/pydata/pandas/issues/3907 ->>>>>>> 7b5933247b80174de4ba571e95a1add809dd9d09 - +.. _GH3911: https://github.com/pydata/pandas/issues/3911 +.. _GH3912: https://github.com/pydata/pandas/issues/3912 pandas 0.11.0 ============= diff --git a/doc/source/v0.11.1.txt b/doc/source/v0.11.1.txt index dd87c5ea827c3..76ae85a53102b 100644 --- a/doc/source/v0.11.1.txt +++ b/doc/source/v0.11.1.txt @@ -300,9 +300,11 @@ Bug Fixes ~~~~~~~~~ - Plotting functions now raise a ``TypeError`` before trying to plot anything - if the associated objects have have a ``dtype`` of ``object`` (GH1818_, - GH3572_). This happens before any drawing takes place which elimnates any - spurious plots from showing up. + if the associated objects have have a dtype of ``object`` (GH1818_, + GH3572_, GH3911_, GH3912_), but they will try to convert object arrays to + numeric arrays if possible so that you can still plot, for example, an + object array with floats. This happens before any drawing takes place which + elimnates any spurious plots from showing up. - ``fillna`` methods now raise a ``TypeError`` if the ``value`` parameter is a list or tuple. @@ -416,3 +418,5 @@ on GitHub for a complete list. .. _GH3659: https://github.com/pydata/pandas/issues/3659 .. _GH3679: https://github.com/pydata/pandas/issues/3679 .. _GH3907: https://github.com/pydata/pandas/issues/3907 +.. _GH3911: https://github.com/pydata/pandas/issues/3911 +.. _GH3912: https://github.com/pydata/pandas/issues/3912 diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 56b3c337162bc..4851ea975bebe 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -10,6 +10,7 @@ from pandas.util.testing import ensure_clean from pandas.core.config import set_option + import numpy as np from numpy.testing import assert_array_equal @@ -198,11 +199,13 @@ def test_invalid_plot_data(self): @slow def test_valid_object_plot(self): + from pandas.io.pytables import PerformanceWarning s = Series(range(10), dtype=object) kinds = 'line', 'bar', 'barh', 'kde', 'density' for kind in kinds: - _check_plot_works(s.plot, kind=kind) + tm.assert_warns(PerformanceWarning, _check_plot_works, s.plot, + kind=kind) def test_partially_invalid_plot_data(self): s = Series(['a', 'b', 1.0, 2]) diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 3c7bd7fb5d704..51bad322f358d 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -877,16 +877,25 @@ def _get_layout(self): return (len(self.data.columns), 1) def _compute_plot_data(self): + from pandas.io.pytables import PerformanceWarning try: # might be a frame numeric_data = self.data._get_numeric_data() except AttributeError: - # attempt soft conversion - numeric_data = self.data.convert_objects() + numeric_data = self.data + orig_dtype = numeric_data.dtype - # a series, but no object dtypes allowed! - if numeric_data.dtype == np.object_: - raise TypeError('invalid dtype for plotting') + if orig_dtype == np.object_: + # attempt soft conversion, but raise a perf warning + numeric_data = numeric_data.convert_objects() + num_data_dtype = numeric_data.dtype + + if num_data_dtype == np.object_: + raise TypeError('No numeric data to plot') + else: + warnings.warn('Coerced object dtype to numeric dtype, ' + 'you should avoid object dtyped Series if ' + 'possible', PerformanceWarning) try: is_empty = numeric_data.empty diff --git a/pandas/util/testing.py b/pandas/util/testing.py index dd86862a2d551..66fc7d66fcfae 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -7,6 +7,7 @@ import string import sys import tempfile +import warnings from contextlib import contextmanager # contextlib is available since 2.5 @@ -746,3 +747,15 @@ def stdin_encoding(encoding=None): sys.stdin = SimpleMock(sys.stdin, "encoding", encoding) yield sys.stdin = _stdin + + +def assert_warns(warning, f, *args, **kwargs): + """ + From: http://stackoverflow.com/questions/3892218/how-to-test-with-pythons-unittest-that-a-warning-has-been-thrown + """ + with warnings.catch_warnings(record=True) as warning_list: + warnings.simplefilter('always') + f(*args, **kwargs) + msg = '{0!r} not raised'.format(warning) + assert any(issubclass(item.category, warning) + for item in warning_list), msg From 7e4b27f8e8e938b50409c35ae67025cf2f737b89 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Sat, 15 Jun 2013 14:07:00 -0400 Subject: [PATCH 3/4] CLN: move PerformanceWarning to io.common and clean up --- pandas/io/common.py | 34 ++++++++++++++++++------------- pandas/io/pytables.py | 47 +++++++++++++++++++++++++++---------------- 2 files changed, 50 insertions(+), 31 deletions(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index 353930482c8b8..3bd6dd5d74ba8 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -9,6 +9,10 @@ _VALID_URLS.discard('') +class PerformanceWarning(Warning): + pass + + def _is_url(url): """Check to see if a URL has a valid protocol. @@ -26,27 +30,29 @@ def _is_url(url): except: return False + def _is_s3_url(url): - """ Check for an s3 url """ + """Check for an s3 url""" try: return urlparse.urlparse(url).scheme == 's3' except: return False + def get_filepath_or_buffer(filepath_or_buffer, encoding=None): - """ if the filepath_or_buffer is a url, translate and return the buffer - passthru otherwise - - Parameters - ---------- - filepath_or_buffer : a url, filepath, or buffer - encoding : the encoding to use to decode py3 bytes, default is 'utf-8' - - Returns - ------- - a filepath_or_buffer, the encoding - - """ + """ + If the filepath_or_buffer is a url, translate and return the buffer + passthru otherwise. + + Parameters + ---------- + filepath_or_buffer : a url, filepath, or buffer + encoding : the encoding to use to decode py3 bytes, default is 'utf-8' + + Returns + ------- + a filepath_or_buffer, the encoding + """ if _is_url(filepath_or_buffer): from urllib2 import urlopen diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index b1b7b80e5fd23..62aa1b99dfac0 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -12,23 +12,22 @@ import warnings import numpy as np -from pandas import ( - Series, TimeSeries, DataFrame, Panel, Panel4D, Index, - MultiIndex, Int64Index, Timestamp -) +from pandas import (Series, TimeSeries, DataFrame, Panel, Panel4D, Index, + MultiIndex, Int64Index, Timestamp) from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel from pandas.sparse.array import BlockIndex, IntIndex from pandas.tseries.api import PeriodIndex, DatetimeIndex -from pandas.core.common import adjoin, isnull, is_list_like -from pandas.core.algorithms import match, unique, factorize +from pandas.core.common import adjoin, is_list_like +from pandas.core.algorithms import match, unique from pandas.core.categorical import Categorical -from pandas.core.common import _asarray_tuplesafe, _try_sort +from pandas.core.common import _asarray_tuplesafe from pandas.core.internals import BlockManager, make_block from pandas.core.reshape import block2d_to_blocknd, factor_indexer -from pandas.core.index import Int64Index, _ensure_index +from pandas.core.index import _ensure_index import pandas.core.common as com from pandas.tools.merge import concat from pandas.util import py3compat +from pandas.io.common import PerformanceWarning import pandas.lib as lib import pandas.algos as algos @@ -42,11 +41,14 @@ # PY3 encoding if we don't specify _default_encoding = 'UTF-8' + def _ensure_decoded(s): """ if we have bytes, decode them to unicde """ if isinstance(s, np.bytes_): s = s.decode('UTF-8') return s + + def _ensure_encoding(encoding): # set the encoding if we need if encoding is None: @@ -54,20 +56,31 @@ def _ensure_encoding(encoding): encoding = _default_encoding return encoding -class IncompatibilityWarning(Warning): pass + +class IncompatibilityWarning(Warning): + pass + + incompatibility_doc = """ -where criteria is being ignored as this version [%s] is too old (or not-defined), -read the file in and write it out to a new file to upgrade (with the copy_to method) +where criteria is being ignored as this version [%s] is too old (or +not-defined), read the file in and write it out to a new file to upgrade (with +the copy_to method) """ -class AttributeConflictWarning(Warning): pass + + +class AttributeConflictWarning(Warning): + pass + + attribute_conflict_doc = """ -the [%s] attribute of the existing index is [%s] which conflicts with the new [%s], -resetting the attribute to None +the [%s] attribute of the existing index is [%s] which conflicts with the new +[%s], resetting the attribute to None """ -class PerformanceWarning(Warning): pass + + performance_doc = """ -your performance may suffer as PyTables will pickle object types that it cannot map -directly to c-types [inferred_type->%s,key->%s] [items->%s] +your performance may suffer as PyTables will pickle object types that it cannot +map directly to c-types [inferred_type->%s,key->%s] [items->%s] """ # map object types From cc947088ace30bd7371367e5acbd8aeb0812c1ed Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Sat, 15 Jun 2013 14:08:37 -0400 Subject: [PATCH 4/4] ENH/CLN: use @jtratner's decorator to check for raised warnings --- pandas/tests/test_graphics.py | 5 ++-- pandas/tools/plotting.py | 23 +++++++--------- pandas/util/testing.py | 51 ++++++++++++++++++++++++++++------- 3 files changed, 54 insertions(+), 25 deletions(-) diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 4851ea975bebe..e57e5a9af2fc0 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -199,13 +199,12 @@ def test_invalid_plot_data(self): @slow def test_valid_object_plot(self): - from pandas.io.pytables import PerformanceWarning + from pandas.io.common import PerformanceWarning s = Series(range(10), dtype=object) kinds = 'line', 'bar', 'barh', 'kde', 'density' for kind in kinds: - tm.assert_warns(PerformanceWarning, _check_plot_works, s.plot, - kind=kind) + _check_plot_works(s.plot, kind=kind) def test_partially_invalid_plot_data(self): s = Series(['a', 'b', 1.0, 2]) diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 51bad322f358d..4e85d742e352c 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -877,25 +877,21 @@ def _get_layout(self): return (len(self.data.columns), 1) def _compute_plot_data(self): - from pandas.io.pytables import PerformanceWarning try: - # might be a frame + # might be an ndframe numeric_data = self.data._get_numeric_data() - except AttributeError: + except AttributeError: # TODO: rm in 0.12 (series-inherit-ndframe) numeric_data = self.data orig_dtype = numeric_data.dtype + # possible object array of numeric data if orig_dtype == np.object_: - # attempt soft conversion, but raise a perf warning - numeric_data = numeric_data.convert_objects() - num_data_dtype = numeric_data.dtype + numeric_data = numeric_data.convert_objects() # soft convert - if num_data_dtype == np.object_: - raise TypeError('No numeric data to plot') - else: - warnings.warn('Coerced object dtype to numeric dtype, ' - 'you should avoid object dtyped Series if ' - 'possible', PerformanceWarning) + # still an object dtype so we can't plot it + if numeric_data.dtype == np.object_: + raise TypeError('Series has object dtype and cannot be' + ' converted: no numeric data to plot') try: is_empty = numeric_data.empty @@ -904,7 +900,8 @@ def _compute_plot_data(self): # no empty frames or series allowed if is_empty: - raise TypeError('No numeric data to plot') + raise TypeError('Empty {0!r}: no numeric data to ' + 'plot'.format(numeric_data.__class__.__name__)) self.data = numeric_data diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 66fc7d66fcfae..20e59b6d3342a 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -40,7 +40,7 @@ def rands(n): choices = string.ascii_letters + string.digits - return ''.join([random.choice(choices) for _ in xrange(n)]) + return ''.join(random.choice(choices) for _ in xrange(n)) def randu(n): @@ -749,13 +749,46 @@ def stdin_encoding(encoding=None): sys.stdin = _stdin -def assert_warns(warning, f, *args, **kwargs): +@contextmanager +def assert_produces_warning(expected_warning=Warning, filter_level="always"): """ - From: http://stackoverflow.com/questions/3892218/how-to-test-with-pythons-unittest-that-a-warning-has-been-thrown + Context manager for running code that expects to raise (or not raise) + warnings. Checks that code raises the expected warning and only the + expected warning. Pass ``False`` or ``None`` to check that it does *not* + raise a warning. Defaults to ``exception.Warning``, baseclass of all + Warnings. (basically a wrapper around ``warnings.catch_warnings``). + + >>> import warnings + >>> with assert_produces_warning(): + ... warnings.warn(UserWarning()) + ... + >>> with assert_produces_warning(False): + ... warnings.warn(RuntimeWarning()) + ... + Traceback (most recent call last): + ... + AssertionError: Caused unexpected warning(s): ['RuntimeWarning']. + >>> with assert_produces_warning(UserWarning): + ... warnings.warn(RuntimeWarning()) + Traceback (most recent call last): + ... + AssertionError: Did not see expected warning of class 'UserWarning'. + + ..warn:: This is *not* thread-safe. """ - with warnings.catch_warnings(record=True) as warning_list: - warnings.simplefilter('always') - f(*args, **kwargs) - msg = '{0!r} not raised'.format(warning) - assert any(issubclass(item.category, warning) - for item in warning_list), msg + with warnings.catch_warnings(record=True) as w: + saw_warning = False + warnings.simplefilter(filter_level) + yield w + extra_warnings = [] + for actual_warning in w: + if (expected_warning and issubclass(actual_warning.category, + expected_warning)): + saw_warning = True + else: + extra_warnings.append(actual_warning.category.__name__) + if expected_warning: + assert saw_warning, ("Did not see expected warning of class %r." + % expected_warning.__name__) + assert not extra_warnings, ("Caused unexpected warning(s): %r." + % extra_warnings)