Merge branch 'master' into fix-14763

jeffcarey · jeffcarey · commit 4d3ca3a92180 · 2016-12-09T17:43:50.000-08:00
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
@@ -55,7 +55,7 @@ if [ x"$DOC_BUILD" != x"" ]; then
     touch .nojekyll
     git add --all .
     git commit -m "Version" --allow-empty
-    git remote add origin "https://$GH_TOKEN@github.com/pandas-docs/pandas-docs-travis"
+    git remote add origin "https://pandas-docs:$GH_TOKEN@github.com/pandas-docs/pandas-docs-travis"
     git push origin gh-pages -f
 fi
 
diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst
@@ -528,12 +528,14 @@ return a copy of the data rather than a view:
    jim joe
    1   z    0.64094
 
+.. _advanced.unsorted:
+
 Furthermore if you try to index something that is not fully lexsorted, this can raise:
 
 .. code-block:: ipython
 
     In [5]: dfm.loc[(0,'y'):(1, 'z')]
-    KeyError: 'Key length (2) was greater than MultiIndex lexsort depth (1)'
+    UnsortedIndexError: 'Key length (2) was greater than MultiIndex lexsort depth (1)'
 
 The ``is_lexsorted()`` method on an ``Index`` show if the index is sorted, and the ``lexsort_depth`` property returns the sort depth:
 
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -50,9 +50,15 @@ Other enhancements
 - ``Series.sort_index`` accepts parameters ``kind`` and ``na_position`` (:issue:`13589`, :issue:`14444`)
 
 - ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`)
+
+- New ``UnsortedIndexError`` (subclass of ``KeyError``) raised when indexing/slicing into an
+  unsorted MultiIndex (:issue:`11897`). This allows differentiation between errors due to lack
+  of sorting or an incorrect key. See :ref:`here <advanced.unsorted>`
+ 
 - ``pd.cut`` and ``pd.qcut`` now support datetime64 and timedelta64 dtypes (issue:`14714`)
 - ``Series`` provides a ``to_excel`` method to output Excel files (:issue:`8825`)
 - The ``usecols`` argument in ``pd.read_csv`` now accepts a callable function as a value  (:issue:`14154`)
+- ``pd.DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`)
 
 .. _whatsnew_0200.api_breaking:
 
@@ -70,6 +76,9 @@ Backwards incompatible API changes
 Other API Changes
 ^^^^^^^^^^^^^^^^^
 
+- Change error message text when indexing via a
+  boolean ``Series`` that has an incompatible index (:issue:`14491`)
+
 .. _whatsnew_0200.deprecations:
 
 Deprecations
diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -97,6 +97,16 @@ class UnsupportedFunctionCall(ValueError):
     pass
 
 
+class UnsortedIndexError(KeyError):
+    """ Error raised when attempting to get a slice of a MultiIndex
+    and the index has not been lexsorted. Subclass of `KeyError`.
+
+    .. versionadded:: 0.20.0
+
+    """
+    pass
+
+
 class AbstractMethodError(NotImplementedError):
     """Raise this error instead of NotImplementedError for abstract methods
     while keeping compatibility with Python 2 and Python 3.
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2483,7 +2483,7 @@ def assign(self, **kwargs):
         Notes
         -----
         Since ``kwargs`` is a dictionary, the order of your
-        arguments may not be preserved. The make things predicatable,
+        arguments may not be preserved. To make things predicatable,
         the columns are inserted in alphabetical order, at the end of
         your DataFrame. Assigning multiple columns within the same
         ``assign`` is possible, but you cannot reference other columns
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -3635,14 +3635,17 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
               require that you also specify an `order` (int),
               e.g. df.interpolate(method='polynomial', order=4).
               These use the actual numerical values of the index.
-            * 'krogh', 'piecewise_polynomial', 'spline', 'pchip' and 'akima' are all
-              wrappers around the scipy interpolation methods of similar
-              names. These use the actual numerical values of the index. See
-              the scipy documentation for more on their behavior
-              `here <http://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation>`__  # noqa
-              `and here <http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html>`__  # noqa
+            * 'krogh', 'piecewise_polynomial', 'spline', 'pchip' and 'akima'
+              are all wrappers around the scipy interpolation methods of
+              similar names. These use the actual numerical values of the
+              index. For more information on their behavior, see the
+              `scipy documentation
+              <http://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation>`__
+              and `tutorial documentation
+              <http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html>`__
             * 'from_derivatives' refers to BPoly.from_derivatives which
-              replaces 'piecewise_polynomial' interpolation method in scipy 0.18
+              replaces 'piecewise_polynomial' interpolation method in
+              scipy 0.18
 
             .. versionadded:: 0.18.1
 
@@ -3656,7 +3659,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
             * 1: fill row-by-row
         limit : int, default None.
             Maximum number of consecutive NaNs to fill.
-        limit_direction : {'forward', 'backward', 'both'}, defaults to 'forward'
+        limit_direction : {'forward', 'backward', 'both'}, default 'forward'
             If limit is specified, consecutive NaNs will be filled in this
             direction.
 
@@ -4159,6 +4162,9 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
 
             .. versionadded:: 0.19.0
 
+        Notes
+        -----
+
         To learn more about the offset strings, please see `this link
         <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
 
@@ -4346,7 +4352,7 @@ def rank(self, axis=0, method='average', numeric_only=None,
 
         Parameters
         ----------
-        axis: {0 or 'index', 1 or 'columns'}, default 0
+        axis : {0 or 'index', 1 or 'columns'}, default 0
             index to direct ranking
         method : {'average', 'min', 'max', 'first', 'dense'}
             * average: average rank of group
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -1814,7 +1814,9 @@ def check_bool_indexer(ax, key):
         result = result.reindex(ax)
         mask = isnull(result._values)
         if mask.any():
-            raise IndexingError('Unalignable boolean Series key provided')
+            raise IndexingError('Unalignable boolean Series provided as '
+                                'indexer (index of the boolean Series and of '
+                                'the indexed object do not match')
         result = result.astype(bool)._values
     elif is_sparse(result):
         result = result.to_dense()
diff --git a/pandas/core/ops.py b/pandas/core/ops.py
@@ -1006,7 +1006,7 @@ def wrapper(self, other):
 
 Parameters
 ----------
-other: Series or scalar value
+other : Series or scalar value
 fill_value : None or float value, default None (NaN)
     Fill missing (NaN) values with this value. If both Series are
     missing, the result will be missing
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2033,9 +2033,9 @@ def reorder_levels(self, order):
 
         Parameters
         ----------
-        order: list of int representing new level order.
+        order : list of int representing new level order.
                (reference level by number or key)
-        axis: where to reorder levels
+        axis : where to reorder levels
 
         Returns
         -------
diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py
@@ -25,7 +25,8 @@
 from pandas.core.common import (_values_from_object,
                                 is_bool_indexer,
                                 is_null_slice,
-                                PerformanceWarning)
+                                PerformanceWarning,
+                                UnsortedIndexError)
 
 
 from pandas.core.base import FrozenList
@@ -1936,9 +1937,10 @@ def get_locs(self, tup):
 
         # must be lexsorted to at least as many levels
         if not self.is_lexsorted_for_tuple(tup):
-            raise KeyError('MultiIndex Slicing requires the index to be fully '
-                           'lexsorted tuple len ({0}), lexsort depth '
-                           '({1})'.format(len(tup), self.lexsort_depth))
+            raise UnsortedIndexError('MultiIndex Slicing requires the index '
+                                     'to be fully lexsorted tuple len ({0}), '
+                                     'lexsort depth ({1})'
+                                     .format(len(tup), self.lexsort_depth))
 
         # indexer
         # this is the list of all values that we want to select
diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py
@@ -1453,7 +1453,7 @@ def test_as_recarray(self):
                 FutureWarning, check_stacklevel=False):
             data = 'a,b\n1,a\n2,b'
             expected = np.array([(1, 'a'), (2, 'b')],
-                                dtype=[('a', '<i8'), ('b', 'O')])
+                                dtype=[('a', '=i8'), ('b', 'O')])
             out = self.read_csv(StringIO(data), as_recarray=True)
             tm.assert_numpy_array_equal(out, expected)
 
@@ -1462,7 +1462,7 @@ def test_as_recarray(self):
                 FutureWarning, check_stacklevel=False):
             data = 'a,b\n1,a\n2,b'
             expected = np.array([(1, 'a'), (2, 'b')],
-                                dtype=[('a', '<i8'), ('b', 'O')])
+                                dtype=[('a', '=i8'), ('b', 'O')])
             out = self.read_csv(StringIO(data), as_recarray=True, index_col=0)
             tm.assert_numpy_array_equal(out, expected)
 
@@ -1471,7 +1471,7 @@ def test_as_recarray(self):
                 FutureWarning, check_stacklevel=False):
             data = '1,a\n2,b'
             expected = np.array([(1, 'a'), (2, 'b')],
-                                dtype=[('a', '<i8'), ('b', 'O')])
+                                dtype=[('a', '=i8'), ('b', 'O')])
             out = self.read_csv(StringIO(data), names=['a', 'b'],
                                 header=None, as_recarray=True)
             tm.assert_numpy_array_equal(out, expected)
@@ -1482,15 +1482,15 @@ def test_as_recarray(self):
                 FutureWarning, check_stacklevel=False):
             data = 'b,a\n1,a\n2,b'
             expected = np.array([(1, 'a'), (2, 'b')],
-                                dtype=[('b', '<i8'), ('a', 'O')])
+                                dtype=[('b', '=i8'), ('a', 'O')])
             out = self.read_csv(StringIO(data), as_recarray=True)
             tm.assert_numpy_array_equal(out, expected)
 
         # overrides the squeeze parameter
         with tm.assert_produces_warning(
                 FutureWarning, check_stacklevel=False):
             data = 'a\n1'
-            expected = np.array([(1,)], dtype=[('a', '<i8')])
+            expected = np.array([(1,)], dtype=[('a', '=i8')])
             out = self.read_csv(StringIO(data), as_recarray=True, squeeze=True)
             tm.assert_numpy_array_equal(out, expected)
 
@@ -1500,7 +1500,7 @@ def test_as_recarray(self):
             data = 'a,b\n1,a\n2,b'
             conv = lambda x: int(x) + 1
             expected = np.array([(2, 'a'), (3, 'b')],
-                                dtype=[('a', '<i8'), ('b', 'O')])
+                                dtype=[('a', '=i8'), ('b', 'O')])
             out = self.read_csv(StringIO(data), as_recarray=True,
                                 converters={'a': conv})
             tm.assert_numpy_array_equal(out, expected)
@@ -1509,7 +1509,7 @@ def test_as_recarray(self):
         with tm.assert_produces_warning(
                 FutureWarning, check_stacklevel=False):
             data = 'a,b\n1,a\n2,b'
-            expected = np.array([(1,), (2,)], dtype=[('a', '<i8')])
+            expected = np.array([(1,), (2,)], dtype=[('a', '=i8')])
             out = self.read_csv(StringIO(data), as_recarray=True,
                                 usecols=['a'])
             tm.assert_numpy_array_equal(out, expected)
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
@@ -8,7 +8,7 @@
 
 from pandas import (DataFrame, date_range, period_range, MultiIndex, Index,
                     CategoricalIndex, compat)
-from pandas.core.common import PerformanceWarning
+from pandas.core.common import PerformanceWarning, UnsortedIndexError
 from pandas.indexes.base import InvalidIndexError
 from pandas.compat import range, lrange, u, PY3, long, lzip
 
@@ -2535,3 +2535,19 @@ def test_dropna(self):
         msg = "invalid how option: xxx"
         with tm.assertRaisesRegexp(ValueError, msg):
             idx.dropna(how='xxx')
+
+    def test_unsortedindex(self):
+        # GH 11897
+        mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'),
+                                        ('x', 'b'), ('y', 'a'), ('z', 'b')],
+                                       names=['one', 'two'])
+        df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi,
+                          columns=['one', 'two'])
+
+        with assertRaises(UnsortedIndexError):
+            df.loc(axis=0)['z', :]
+        df.sort_index(inplace=True)
+        self.assertEqual(len(df.loc(axis=0)['z', :]), 2)
+
+        with assertRaises(KeyError):
+            df.loc(axis=0)['q', :]
diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
@@ -23,7 +23,7 @@
                              MultiIndex, Timestamp, Timedelta)
 from pandas.formats.printing import pprint_thing
 from pandas import concat
-from pandas.core.common import PerformanceWarning
+from pandas.core.common import PerformanceWarning, UnsortedIndexError
 
 import pandas.util.testing as tm
 from pandas import date_range
@@ -2230,7 +2230,7 @@ def f():
         df = df.sortlevel(level=1, axis=0)
         self.assertEqual(df.index.lexsort_depth, 0)
         with tm.assertRaisesRegexp(
-                KeyError,
+                UnsortedIndexError,
                 'MultiIndex Slicing requires the index to be fully '
                 r'lexsorted tuple len \(2\), lexsort depth \(0\)'):
             df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
@@ -2417,7 +2417,7 @@ def test_per_axis_per_level_doc_examples(self):
         def f():
             df.loc['A1', (slice(None), 'foo')]
 
-        self.assertRaises(KeyError, f)
+        self.assertRaises(UnsortedIndexError, f)
         df = df.sortlevel(axis=1)
 
         # slicing
@@ -3480,8 +3480,12 @@ def test_iloc_mask(self):
             ('index', '.loc'): '0b11',
             ('index', '.iloc'): ('iLocation based boolean indexing '
                                  'cannot use an indexable as a mask'),
-            ('locs', ''): 'Unalignable boolean Series key provided',
-            ('locs', '.loc'): 'Unalignable boolean Series key provided',
+            ('locs', ''): 'Unalignable boolean Series provided as indexer '
+                          '(index of the boolean Series and of the indexed '
+                          'object do not match',
+            ('locs', '.loc'): 'Unalignable boolean Series provided as indexer '
+                              '(index of the boolean Series and of the '
+                              'indexed object do not match',
             ('locs', '.iloc'): ('iLocation based boolean indexing on an '
                                 'integer type is not available'),
         }
diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py
@@ -16,13 +16,11 @@
 from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works,
                                           _ok_for_gaussian_kde)
 
-
 """ Test cases for misc plot functions """
 
 
 @tm.mplskip
 class TestSeriesPlots(TestPlotBase):
-
     def setUp(self):
         TestPlotBase.setUp(self)
         import matplotlib as mpl
@@ -54,7 +52,6 @@ def test_bootstrap_plot(self):
 
 @tm.mplskip
 class TestDataFramePlots(TestPlotBase):
-
     @slow
     def test_scatter_plot_legacy(self):
         tm._skip_if_no_scipy()
@@ -277,6 +274,32 @@ def test_radviz(self):
         handles, labels = ax.get_legend_handles_labels()
         self._check_colors(handles, facecolors=colors)
 
+    @slow
+    def test_subplot_titles(self):
+        df = self.iris.drop('Name', axis=1).head()
+        # Use the column names as the subplot titles
+        title = list(df.columns)
+
+        # Case len(title) == len(df)
+        plot = df.plot(subplots=True, title=title)
+        self.assertEqual([p.get_title() for p in plot], title)
+
+        # Case len(title) > len(df)
+        self.assertRaises(ValueError, df.plot, subplots=True,
+                          title=title + ["kittens > puppies"])
+
+        # Case len(title) < len(df)
+        self.assertRaises(ValueError, df.plot, subplots=True, title=title[:2])
+
+        # Case subplots=False and title is of type list
+        self.assertRaises(ValueError, df.plot, subplots=False, title=title)
+
+        # Case df with 3 numeric columns but layout of (2,2)
+        plot = df.drop('SepalWidth', axis=1).plot(subplots=True, layout=(2, 2),
+                                                  title=title[:-1])
+        title_list = [ax.get_title() for sublist in plot for ax in sublist]
+        self.assertEqual(title_list, title[:3] + [''])
+
 
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py
@@ -326,7 +326,7 @@ def test_strftime(self):
         period_index = period_range('20150301', periods=5)
         result = period_index.strftime("%Y/%m/%d")
         expected = np.array(['2015/03/01', '2015/03/02', '2015/03/03',
-                             '2015/03/04', '2015/03/05'], dtype='<U10')
+                             '2015/03/04', '2015/03/05'], dtype='=U10')
         self.assert_numpy_array_equal(result, expected)
 
         s = Series([datetime(2013, 1, 1, 2, 32, 59), datetime(2013, 1, 2, 14,
diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py