diff --git a/RELEASE.rst b/RELEASE.rst index 9d862c687bcac..f03e10df1b460 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -77,8 +77,10 @@ pandas 0.11.1 dependencies offered for Linux) (GH3837_). - Plotting functions now raise a ``TypeError`` before trying to plot anything if the associated objects have have a dtype of ``object`` (GH1818_, - GH3572_). This happens before any drawing takes place which elimnates any - spurious plots from showing up. + GH3572_, GH3911_, GH3912_), but they will try to convert object arrays to + numeric arrays if possible so that you can still plot, for example, an + object array with floats. This happens before any drawing takes place which + elimnates any spurious plots from showing up. - Added Faq section on repr display options, to help users customize their setup. - ``where`` operations that result in block splitting are much faster (GH3733_) - Series and DataFrame hist methods now take a ``figsize`` argument (GH3834_) @@ -341,13 +343,13 @@ pandas 0.11.1 .. _GH3834: https://github.com/pydata/pandas/issues/3834 .. _GH3873: https://github.com/pydata/pandas/issues/3873 .. _GH3877: https://github.com/pydata/pandas/issues/3877 +.. _GH3659: https://github.com/pydata/pandas/issues/3659 +.. _GH3679: https://github.com/pydata/pandas/issues/3679 .. _GH3880: https://github.com/pydata/pandas/issues/3880 -<<<<<<< HEAD .. _GH3911: https://github.com/pydata/pandas/issues/3911 -======= .. _GH3907: https://github.com/pydata/pandas/issues/3907 ->>>>>>> 7b5933247b80174de4ba571e95a1add809dd9d09 - +.. _GH3911: https://github.com/pydata/pandas/issues/3911 +.. _GH3912: https://github.com/pydata/pandas/issues/3912 pandas 0.11.0 ============= diff --git a/doc/source/v0.11.1.txt b/doc/source/v0.11.1.txt index dd87c5ea827c3..76ae85a53102b 100644 --- a/doc/source/v0.11.1.txt +++ b/doc/source/v0.11.1.txt @@ -300,9 +300,11 @@ Bug Fixes ~~~~~~~~~ - Plotting functions now raise a ``TypeError`` before trying to plot anything - if the associated objects have have a ``dtype`` of ``object`` (GH1818_, - GH3572_). This happens before any drawing takes place which elimnates any - spurious plots from showing up. + if the associated objects have have a dtype of ``object`` (GH1818_, + GH3572_, GH3911_, GH3912_), but they will try to convert object arrays to + numeric arrays if possible so that you can still plot, for example, an + object array with floats. This happens before any drawing takes place which + elimnates any spurious plots from showing up. - ``fillna`` methods now raise a ``TypeError`` if the ``value`` parameter is a list or tuple. @@ -416,3 +418,5 @@ on GitHub for a complete list. .. _GH3659: https://github.com/pydata/pandas/issues/3659 .. _GH3679: https://github.com/pydata/pandas/issues/3679 .. _GH3907: https://github.com/pydata/pandas/issues/3907 +.. _GH3911: https://github.com/pydata/pandas/issues/3911 +.. _GH3912: https://github.com/pydata/pandas/issues/3912 diff --git a/pandas/io/common.py b/pandas/io/common.py index 353930482c8b8..3bd6dd5d74ba8 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -9,6 +9,10 @@ _VALID_URLS.discard('') +class PerformanceWarning(Warning): + pass + + def _is_url(url): """Check to see if a URL has a valid protocol. @@ -26,27 +30,29 @@ def _is_url(url): except: return False + def _is_s3_url(url): - """ Check for an s3 url """ + """Check for an s3 url""" try: return urlparse.urlparse(url).scheme == 's3' except: return False + def get_filepath_or_buffer(filepath_or_buffer, encoding=None): - """ if the filepath_or_buffer is a url, translate and return the buffer - passthru otherwise - - Parameters - ---------- - filepath_or_buffer : a url, filepath, or buffer - encoding : the encoding to use to decode py3 bytes, default is 'utf-8' - - Returns - ------- - a filepath_or_buffer, the encoding - - """ + """ + If the filepath_or_buffer is a url, translate and return the buffer + passthru otherwise. + + Parameters + ---------- + filepath_or_buffer : a url, filepath, or buffer + encoding : the encoding to use to decode py3 bytes, default is 'utf-8' + + Returns + ------- + a filepath_or_buffer, the encoding + """ if _is_url(filepath_or_buffer): from urllib2 import urlopen diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index b1b7b80e5fd23..62aa1b99dfac0 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -12,23 +12,22 @@ import warnings import numpy as np -from pandas import ( - Series, TimeSeries, DataFrame, Panel, Panel4D, Index, - MultiIndex, Int64Index, Timestamp -) +from pandas import (Series, TimeSeries, DataFrame, Panel, Panel4D, Index, + MultiIndex, Int64Index, Timestamp) from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel from pandas.sparse.array import BlockIndex, IntIndex from pandas.tseries.api import PeriodIndex, DatetimeIndex -from pandas.core.common import adjoin, isnull, is_list_like -from pandas.core.algorithms import match, unique, factorize +from pandas.core.common import adjoin, is_list_like +from pandas.core.algorithms import match, unique from pandas.core.categorical import Categorical -from pandas.core.common import _asarray_tuplesafe, _try_sort +from pandas.core.common import _asarray_tuplesafe from pandas.core.internals import BlockManager, make_block from pandas.core.reshape import block2d_to_blocknd, factor_indexer -from pandas.core.index import Int64Index, _ensure_index +from pandas.core.index import _ensure_index import pandas.core.common as com from pandas.tools.merge import concat from pandas.util import py3compat +from pandas.io.common import PerformanceWarning import pandas.lib as lib import pandas.algos as algos @@ -42,11 +41,14 @@ # PY3 encoding if we don't specify _default_encoding = 'UTF-8' + def _ensure_decoded(s): """ if we have bytes, decode them to unicde """ if isinstance(s, np.bytes_): s = s.decode('UTF-8') return s + + def _ensure_encoding(encoding): # set the encoding if we need if encoding is None: @@ -54,20 +56,31 @@ def _ensure_encoding(encoding): encoding = _default_encoding return encoding -class IncompatibilityWarning(Warning): pass + +class IncompatibilityWarning(Warning): + pass + + incompatibility_doc = """ -where criteria is being ignored as this version [%s] is too old (or not-defined), -read the file in and write it out to a new file to upgrade (with the copy_to method) +where criteria is being ignored as this version [%s] is too old (or +not-defined), read the file in and write it out to a new file to upgrade (with +the copy_to method) """ -class AttributeConflictWarning(Warning): pass + + +class AttributeConflictWarning(Warning): + pass + + attribute_conflict_doc = """ -the [%s] attribute of the existing index is [%s] which conflicts with the new [%s], -resetting the attribute to None +the [%s] attribute of the existing index is [%s] which conflicts with the new +[%s], resetting the attribute to None """ -class PerformanceWarning(Warning): pass + + performance_doc = """ -your performance may suffer as PyTables will pickle object types that it cannot map -directly to c-types [inferred_type->%s,key->%s] [items->%s] +your performance may suffer as PyTables will pickle object types that it cannot +map directly to c-types [inferred_type->%s,key->%s] [items->%s] """ # map object types diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 0755caf45d336..e57e5a9af2fc0 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -10,6 +10,7 @@ from pandas.util.testing import ensure_clean from pandas.core.config import set_option + import numpy as np from numpy.testing import assert_array_equal @@ -189,8 +190,7 @@ def test_bootstrap_plot(self): from pandas.tools.plotting import bootstrap_plot _check_plot_works(bootstrap_plot, self.ts, size=10) - @slow - def test_all_invalid_plot_data(self): + def test_invalid_plot_data(self): s = Series(list('abcd')) kinds = 'line', 'bar', 'barh', 'kde', 'density' @@ -198,6 +198,14 @@ def test_all_invalid_plot_data(self): self.assertRaises(TypeError, s.plot, kind=kind) @slow + def test_valid_object_plot(self): + from pandas.io.common import PerformanceWarning + s = Series(range(10), dtype=object) + kinds = 'line', 'bar', 'barh', 'kde', 'density' + + for kind in kinds: + _check_plot_works(s.plot, kind=kind) + def test_partially_invalid_plot_data(self): s = Series(['a', 'b', 1.0, 2]) kinds = 'line', 'bar', 'barh', 'kde', 'density' diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 83ad58c1eb41c..4e85d742e352c 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -878,15 +878,20 @@ def _get_layout(self): def _compute_plot_data(self): try: - # might be a frame + # might be an ndframe numeric_data = self.data._get_numeric_data() - except AttributeError: - # a series, but no object dtypes allowed! - if self.data.dtype == np.object_: - raise TypeError('invalid dtype for plotting, please cast to a ' - 'numeric dtype explicitly if you want to plot') - + except AttributeError: # TODO: rm in 0.12 (series-inherit-ndframe) numeric_data = self.data + orig_dtype = numeric_data.dtype + + # possible object array of numeric data + if orig_dtype == np.object_: + numeric_data = numeric_data.convert_objects() # soft convert + + # still an object dtype so we can't plot it + if numeric_data.dtype == np.object_: + raise TypeError('Series has object dtype and cannot be' + ' converted: no numeric data to plot') try: is_empty = numeric_data.empty @@ -895,7 +900,8 @@ def _compute_plot_data(self): # no empty frames or series allowed if is_empty: - raise TypeError('No numeric data to plot') + raise TypeError('Empty {0!r}: no numeric data to ' + 'plot'.format(numeric_data.__class__.__name__)) self.data = numeric_data diff --git a/pandas/util/testing.py b/pandas/util/testing.py index dd86862a2d551..20e59b6d3342a 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -7,6 +7,7 @@ import string import sys import tempfile +import warnings from contextlib import contextmanager # contextlib is available since 2.5 @@ -39,7 +40,7 @@ def rands(n): choices = string.ascii_letters + string.digits - return ''.join([random.choice(choices) for _ in xrange(n)]) + return ''.join(random.choice(choices) for _ in xrange(n)) def randu(n): @@ -746,3 +747,48 @@ def stdin_encoding(encoding=None): sys.stdin = SimpleMock(sys.stdin, "encoding", encoding) yield sys.stdin = _stdin + + +@contextmanager +def assert_produces_warning(expected_warning=Warning, filter_level="always"): + """ + Context manager for running code that expects to raise (or not raise) + warnings. Checks that code raises the expected warning and only the + expected warning. Pass ``False`` or ``None`` to check that it does *not* + raise a warning. Defaults to ``exception.Warning``, baseclass of all + Warnings. (basically a wrapper around ``warnings.catch_warnings``). + + >>> import warnings + >>> with assert_produces_warning(): + ... warnings.warn(UserWarning()) + ... + >>> with assert_produces_warning(False): + ... warnings.warn(RuntimeWarning()) + ... + Traceback (most recent call last): + ... + AssertionError: Caused unexpected warning(s): ['RuntimeWarning']. + >>> with assert_produces_warning(UserWarning): + ... warnings.warn(RuntimeWarning()) + Traceback (most recent call last): + ... + AssertionError: Did not see expected warning of class 'UserWarning'. + + ..warn:: This is *not* thread-safe. + """ + with warnings.catch_warnings(record=True) as w: + saw_warning = False + warnings.simplefilter(filter_level) + yield w + extra_warnings = [] + for actual_warning in w: + if (expected_warning and issubclass(actual_warning.category, + expected_warning)): + saw_warning = True + else: + extra_warnings.append(actual_warning.category.__name__) + if expected_warning: + assert saw_warning, ("Did not see expected warning of class %r." + % expected_warning.__name__) + assert not extra_warnings, ("Caused unexpected warning(s): %r." + % extra_warnings)