Merge commit 'v0.12.0rc1-90-g4c2d050' into debian

yarikoptic · yarikoptic · commit 4bc4e80a611f · 2013-07-16T12:17:54.000-04:00
* commit 'v0.12.0rc1-90-g4c2d050':
  DOC: add v0.13.0.txt release notes file (but not to the index yet)
  TST: no need for flavor testing in skip
  BUG: remove six import
  BLD: use mpl 1.1.1 in python 2.7 production travis build
  ENH: implement non-unique indexing in series (GH4246)
  DOC: Fix typos in CONTRIBUTING.md
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -1,6 +1,6 @@
 ###Guidelines
 
-All contributions, bug reports, bug fixes, documentation improvments,
+All contributions, bug reports, bug fixes, documentation improvements,
 enhancements and ideas are welcome.
 
 The Github "issues" tab contains some issues labels "Good as first PR", these are
@@ -34,7 +34,7 @@ your contribution or address the issue you're having.
     See the "Getting Travis-CI going" below.
   - We suggest you enable Travis-CI on your fork, to make it easier for the team
      to see that the PR does indeed pass all the tests.
-  - Back-compatiblitiy **really** matters. Pandas already has a large user-base and
+  - Back-compatibility **really** matters. Pandas already has a large user-base and
     a lot of existing user code. Don't break old code if you can avoid it
     Explain the need if there is one in the PR.
     Changes to method signatures should be made in a way which doesn't break existing
@@ -113,7 +113,7 @@ page for any PR you submit. For example:
 
 See the Green "Good to merge!" banner? that's it.
 
-This is especially important for new contributors, as memebers of the pandas dev team
+This is especially important for new contributors, as members of the pandas dev team
 like to know the test suite passes before considering it for merging.
 Even regular contributors who test religiously on their local box (using tox
 for example) often rely on a PR+travis=green to make double sure everything
diff --git a/ci/requirements-2.7.txt b/ci/requirements-2.7.txt
@@ -6,7 +6,7 @@ cython==0.19.1
 bottleneck==0.6.0
 numexpr==2.1
 tables==2.3.1
-matplotlib==1.2.1
+matplotlib==1.1.1
 openpyxl==1.6.2
 xlrd==0.9.2
 patsy==0.1.0
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -235,6 +235,7 @@ pandas 0.12
       names (:issue:`3873`)
     - Bug in non-unique indexing via ``iloc`` (:issue:`4017`); added ``takeable`` argument to
       ``reindex`` for location-based taking
+    - Allow non-unique indexing in series via ``.ix/.loc`` and ``__getitem`` (:issue:`4246)
 
   - Fixed bug in groupby with empty series referencing a variable before assignment. (:issue:`3510`)
   - Allow index name to be used in groupby for non MultiIndex (:issue:`4014`)
diff --git a/doc/source/v0.12.0.txt b/doc/source/v0.12.0.txt
@@ -437,6 +437,7 @@ Bug Fixes
       names (:issue:`3873`)
     - Bug in non-unique indexing via ``iloc`` (:issue:`4017`); added ``takeable`` argument to
       ``reindex`` for location-based taking
+    - Allow non-unique indexing in series via ``.ix/.loc`` and ``__getitem`` (:issue:`4246)
 
   - ``DataFrame.from_records`` did not accept empty recarrays (:issue:`3682`)
   - ``read_html`` now correctly skips tests (:issue:`3741`)
@@ -462,7 +463,7 @@ Bug Fixes
     (:issue:`4089`)
   - Fixed bug in ``DataFrame.replace`` where a nested dict wasn't being
     iterated over when regex=False (:issue:`4115`)
-  - Fixed bug in the parsing of microseconds when using the ``format`` 
+  - Fixed bug in the parsing of microseconds when using the ``format``
     argument in ``to_datetime`` (:issue:`4152`)
   - Fixed bug in ``PandasAutoDateLocator`` where ``invert_xaxis`` triggered
     incorrectly ``MilliSecondLocator``  (:issue:`3990`)
diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt
@@ -0,0 +1,20 @@
+.. _whatsnew_0130:
+
+v0.13.0 (August ??, 2013)
+------------------------
+
+This is a major release from 0.12.0 and includes several new features and
+enhancements along with a large number of bug fixes.
+
+API changes
+~~~~~~~~~~~
+
+Enhancements
+~~~~~~~~~~~~
+
+Bug Fixes
+~~~~~~~~~
+
+See the :ref:`full release notes
+<release>` or issue tracker
+on GitHub for a complete list.
diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -928,7 +928,7 @@ def reindex(self, target, method=None, level=None, limit=None,
                     if method is not None or limit is not None:
                         raise ValueError("cannot reindex a non-unique index "
                                          "with a method or limit")
-                    indexer, _ = self.get_indexer_non_unique(target)
+                    indexer, missing = self.get_indexer_non_unique(target)
 
         return target, indexer
 
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -481,12 +481,12 @@ def _reindex(keys, level=None):
                     new_indexer = (Index(cur_indexer) + Index(missing_indexer)).values
                     new_indexer[missing_indexer] = -1
 
-                    # need to reindex with an indexer on a specific axis
-                    from pandas.core.frame import DataFrame
-                    if not (type(self.obj) == DataFrame):
-                        raise NotImplementedError("cannot handle non-unique indexing for non-DataFrame (yet)")
+                    # reindex with the specified axis
+                    ndim = self.obj.ndim
+                    if axis+1 > ndim:
+                        raise AssertionError("invalid indexing error with non-unique index")
 
-                    args = [None] * 4
+                    args = [None] * (2*ndim)
                     args[2*axis] = new_labels
                     args[2*axis+1] = new_indexer
 
diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py
@@ -5,8 +5,6 @@
 
 import numpy as np
 
-import six
-
 from pandas.core.series import Series
 from pandas.core.frame import DataFrame
 
@@ -691,7 +689,7 @@ def melt(frame, id_vars=None, value_vars=None,
         else:
             var_name = [frame.columns.name if frame.columns.name is not None
                         else 'variable']
-    if isinstance(var_name, six.string_types):
+    if isinstance(var_name, basestring):
         var_name = [var_name]
 
     N, K = frame.shape
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -681,6 +681,10 @@ def _get_with(self, key):
                 return self._get_values(key)
             else:
                 try:
+                    # handle the dup indexing case (GH 4246)
+                    if isinstance(key, (list,tuple)):
+                        return self.ix[key]
+
                     return self.reindex(key)
                 except Exception:
                     # [slice(0, 5, None)] will break if you convert to ndarray,
@@ -2637,8 +2641,13 @@ def reindex(self, index=None, method=None, level=None, fill_value=pa.NA,
         new_index, indexer = self.index.reindex(index, method=method,
                                                 level=level, limit=limit,
                                                 takeable=takeable)
+
+        # GH4246 (dispatch to a common method with frame to handle possibly duplicate index)
+        return self._reindex_with_indexers(new_index, indexer, copy=copy, fill_value=fill_value)
+
+    def _reindex_with_indexers(self, index, indexer, copy, fill_value):
         new_values = com.take_1d(self.values, indexer, fill_value=fill_value)
-        return Series(new_values, index=new_index, name=self.name)
+        return Series(new_values, index=index, name=self.name)
 
     def reindex_axis(self, labels, axis=0, **kwargs):
         """ for compatibility with higher dims """
diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py
@@ -43,9 +43,17 @@ def _skip_if_no(module_name):
 def _skip_if_none_of(module_names):
     if isinstance(module_names, basestring):
         _skip_if_no(module_names)
+        if module_names == 'bs4':
+            import bs4
+            if bs4.__version__ == LooseVersion('4.2.0'):
+                raise nose.SkipTest
     else:
         if not all(_have_module(module_name) for module_name in module_names):
             raise nose.SkipTest
+        if 'bs4' in module_names:
+            import bs4
+            if bs4.__version__ == LooseVersion('4.2.0'):
+                raise nose.SkipTest
 
 
 DATA_PATH = get_data_path()
@@ -82,10 +90,6 @@ def run_read_html(self, *args, **kwargs):
 
     def try_skip(self):
         _skip_if_none_of(('bs4', 'html5lib'))
-        import bs4
-        if (bs4.__version__ == LooseVersion('4.2.0') and
-            self.flavor != ['lxml']):
-            raise nose.SkipTest
 
     def setup_data(self):
         self.spam_data = os.path.join(DATA_PATH, 'spam.html')
@@ -425,7 +429,8 @@ def try_skip(self):
     def test_spam_data_fail(self):
         from lxml.etree import XMLSyntaxError
         spam_data = os.path.join(DATA_PATH, 'spam.html')
-        self.assertRaises(XMLSyntaxError, self.run_read_html, spam_data, flavor=['lxml'])
+        self.assertRaises(XMLSyntaxError, self.run_read_html, spam_data,
+                          flavor=['lxml'])
 
     def test_banklist_data_fail(self):
         from lxml.etree import XMLSyntaxError
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
@@ -788,6 +788,15 @@ def test_getitem_unordered_dup(self):
         self.assert_(np.isscalar(obj['c']))
         self.assert_(obj['c'] == 0)
 
+    def test_getitem_dups_with_missing(self):
+
+        # breaks reindex, so need to use .ix internally
+        # GH 4246
+        s = Series([1,2,3,4],['foo','bar','foo','bah'])
+        expected = s.ix[['foo','bar','bah','bam']]
+        result = s[['foo','bar','bah','bam']]
+        assert_series_equal(result,expected)
+
     def test_setitem_ambiguous_keyerror(self):
         s = Series(range(10), index=range(0, 20, 2))
         self.assertRaises(KeyError, s.__setitem__, 1, 5)
@@ -1141,7 +1150,7 @@ def test_where(self):
         s = Series(np.arange(10))
         mask = s > 5
         self.assertRaises(ValueError, s.__setitem__, mask, ([0]*5,))
-        
+
     def test_where_broadcast(self):
         # Test a variety of differently sized series
         for size in range(2, 6):