pandas-dev · rosnfeld · Mar 3, 2014 · Mar 3, 2014 · Feb 24, 2014 · Mar 4, 2014
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -107,6 +107,10 @@ API Changes
   or numbering columns as needed (:issue:`2385`)
 - Slicing and advanced/boolean indexing operations on ``Index`` classes will no
   longer change type of the resulting index (:issue:`6440`).
+- ``set_index`` no longer converts MultiIndexes to an Index of tuples (:issue:`6459`).
+- Slicing with negative start, stop & step values handles corner cases better (:issue:`6531`):
+  - ``df.iloc[:-len(df)]`` is now empty
+  - ``df.iloc[len(df)::-1]`` now enumerates all elements in reverse
 
 Experimental Features
 ~~~~~~~~~~~~~~~~~~~~~
@@ -139,6 +143,7 @@ Improvements to existing features
 Bug Fixes
 ~~~~~~~~~
 
+- Bug in Series ValueError when index doesn't match data (:issue:`6532`)
 - Bug in ``pd.DataFrame.sort_index`` where mergesort wasn't stable when ``ascending=False`` (:issue:`6399`)
 - Bug in ``pd.tseries.frequencies.to_offset`` when argument has leading zeroes (:issue:`6391`)
 - Bug in version string gen. for dev versions with shallow clones / install from tarball (:issue:`6127`)
@@ -180,7 +185,7 @@ Bug Fixes
 - Bug in :meth:`DataFrame.replace` where nested dicts were erroneously
   depending on the order of dictionary keys and values (:issue:`5338`).
 - Perf issue in concatting with empty objects (:issue:`3259`)
-- Clarify sorting of ``sym_diff`` on ``Index``es with ``NaN``s (:isssue:`6444`)
+- Clarify sorting of ``sym_diff`` on ``Index``es with ``NaN``s (:issue:`6444`)
 - Regression in ``MultiIndex.from_product`` with a ``DatetimeIndex`` as input (:issue:`6439`)
 - Bug in ``str.extract`` when passed a non-default index (:issue:`6348`)
 - Bug in ``str.split`` when passed ``pat=None`` and ``n=1`` (:issue:`6466`)
@@ -194,6 +199,16 @@ Bug Fixes
 - Bug in ``read_html`` tests where redirected invalid URLs would make one test
   fail (:issue:`6445`).
 - Bug in multi-axis indexing using ``.loc`` on non-unique indices (:issue:`6504`)
+- Bug that caused _ref_locs corruption when slice indexing across columns axis of a DataFrame (:issue:`6525`)
+- Regression from 0.13 in the treatmenet of numpy ``datetime64`` non-ns dtypes in Series creation (:issue:`6529`)
+- ``.names`` attribute of MultiIndexes passed to ``set_index`` are now preserved (:issue:`6459`).
+- Bug in setitem with a duplicate index and an alignable rhs (:issue:`6541`)
+- Bug in setitem with loc on mixed integer Indexes (:issue:`6546`)
+- Bug in ``pd.read_stata`` which would use the wrong data types and missing values (:issue:`6327`)
+- Bug in ``DataFrame.to_stata`` that lead to data loss in certain cases, and could exported using the
+  wrong data types and missing values (:issue:`6335`)
+- Inconsistent types in Timestamp addition/subtraction (:issue:`6543`)
+
 
 pandas 0.13.1
 -------------

diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt
@@ -92,6 +92,49 @@ These are out-of-bounds selections
   .. ipython:: python
 
      i[[0,1,2]].astype(np.int_)
+- ``set_index`` no longer converts MultiIndexes to an Index of tuples. For example,
+  the old behavior returned an Index in this case (:issue:`6459`):
+
+  .. ipython:: python
+     :suppress:
+
+     from itertools import product
+     tuples = list(product(('a', 'b'), ('c', 'd')))
+     mi = MultiIndex.from_tuples(tuples)
+     df_multi = DataFrame(np.random.randn(4, 2), index=mi)
+     tuple_ind = pd.Index(tuples)
+
+  .. ipython:: python
+
+     df_multi.index
+
+     @suppress
+     df_multi.index = tuple_ind
+
+     # Old behavior, casted MultiIndex to an Index
+     df_multi.set_index(df_multi.index)
+
+     @suppress
+     df_multi.index = mi
+
+     # New behavior
+     df_multi.set_index(df_multi.index)
+
+  This also applies when passing multiple indices to ``set_index``:
+
+  .. ipython:: python
+
+    @suppress
+    df_multi.index = tuple_ind
+
+    # Old output, 2-level MultiIndex of tuples
+    df_multi.set_index([df_multi.index, df_multi.index])
+
+    @suppress
+    df_multi.index = mi
+
+    # New output, 4-level MultiIndex
+    df_multi.set_index([df_multi.index, df_multi.index])
 
 
 MultiIndexing Using Slicers
@@ -248,6 +291,9 @@ Enhancements
   using ``DataFrame.to_csv`` (:issue:`5414`, :issue:`4528`)
 - Added a ``to_julian_date`` function to ``TimeStamp`` and ``DatetimeIndex``
   to convert to the Julian Date used primarily in astronomy. (:issue:`4041`)
+- ``DataFrame.to_stata`` will now check data for compatibility with Stata data types
+  and will upcast when needed.  When it isn't possibly to losslessly upcast, a warning
+  is raised (:issue:`6327`)
 
 Performance
 ~~~~~~~~~~~

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -124,7 +124,7 @@ def isnull(obj):
 
     See also
     --------
-    pandas.notnull: boolean inverse of pandas.isnull    
+    pandas.notnull: boolean inverse of pandas.isnull
     """
     return _isnull(obj)
 
@@ -272,7 +272,7 @@ def notnull(obj):
     isnulled : array-like of bool or bool
         Array or bool indicating whether an object is *not* null or if an array
         is given which of the element is *not* null.
-    
+
     See also
     --------
     pandas.isnull : boolean inverse of pandas.notnull
@@ -1727,10 +1727,7 @@ def _possibly_cast_to_datetime(value, dtype, coerce=False):
             dtype = value.dtype
 
             if dtype.kind == 'M' and dtype != _NS_DTYPE:
-                try:
-                    value = tslib.array_to_datetime(value)
-                except:
-                    raise
+                value = value.astype(_NS_DTYPE)
 
             elif dtype.kind == 'm' and dtype != _TD_DTYPE:
                 from pandas.tseries.timedeltas import \

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1867,11 +1867,6 @@ def eval(self, expr, **kwargs):
         kwargs['resolvers'] = kwargs.get('resolvers', ()) + resolvers
         return _eval(expr, **kwargs)
 
-    def _slice(self, slobj, axis=0, raise_on_error=False, typ=None):
-        axis = self._get_block_manager_axis(axis)
-        new_data = self._data.get_slice(
-            slobj, axis=axis, raise_on_error=raise_on_error)
-        return self._constructor(new_data)
 
     def _box_item_values(self, key, values):
         items = self.columns[self.columns.get_loc(key)]
@@ -2240,7 +2235,15 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
 
         to_remove = []
         for col in keys:
-            if isinstance(col, Series):
+            if isinstance(col, MultiIndex):
+                # append all but the last column so we don't have to modify
+                # the end of this loop
+                for n in range(col.nlevels - 1):
+                    arrays.append(col.get_level_values(n))
+
+                level = col.get_level_values(col.nlevels - 1)
+                names.extend(col.names)
+            elif isinstance(col, (Series, Index)):
                 level = col.values
                 names.append(col.name)
             elif isinstance(col, (list, np.ndarray)):

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1079,6 +1079,16 @@ def _clear_item_cache(self, i=None):
         else:
             self._item_cache.clear()
 
+    def _slice(self, slobj, axis=0, typ=None):
+        """
+        Construct a slice of this container.
+
+        typ parameter is maintained for compatibility with Series slicing.
+
+        """
+        axis = self._get_block_manager_axis(axis)
+        return self._constructor(self._data.get_slice(slobj, axis=axis))
+
     def _set_item(self, key, value):
         self._data.set(key, value)
         self._clear_item_cache()

diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -555,6 +555,29 @@ def _convert_list_indexer(self, key, typ=None):
         """ convert a list indexer. these should be locations """
         return key
 
+    def _convert_list_indexer_for_mixed(self, keyarr, typ=None):
+        """ passed a key that is tuplesafe that is integer based
+            and we have a mixed index (e.g. number/labels). figure out
+            the indexer. return None if we can't help
+        """
+        if com.is_integer_dtype(keyarr) and not self.is_floating():
+            if self.inferred_type != 'integer':
+                keyarr = np.where(keyarr < 0,
+                                  len(self) + keyarr, keyarr)
+
+            if self.inferred_type == 'mixed-integer':
+                indexer = self.get_indexer(keyarr)
+                if (indexer >= 0).all():
+                    return indexer
+
+                from pandas.core.indexing import _maybe_convert_indices
+                return _maybe_convert_indices(indexer, len(self))
+
+            elif not self.inferred_type == 'integer':
+                return keyarr
+
+        return None
+
     def _convert_indexer_error(self, key, msg=None):
         if msg is None:
             msg = 'label'
@@ -987,8 +1010,13 @@ def intersection(self, other):
             except TypeError:
                 pass
 
-        indexer = self.get_indexer(other.values)
-        indexer = indexer.take((indexer != -1).nonzero()[0])
+        try:
+            indexer = self.get_indexer(other.values)
+            indexer = indexer.take((indexer != -1).nonzero()[0])
+        except:
+            # duplicates
+            indexer = self.get_indexer_non_unique(other.values)[0].unique()
+
         return self.take(indexer)
 
     def diff(self, other):

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -91,32 +91,8 @@ def _get_label(self, label, axis=0):
     def _get_loc(self, key, axis=0):
         return self.obj._ixs(key, axis=axis)
 
-    def _slice(self, obj, axis=0, raise_on_error=False, typ=None):
-
-        # make out-of-bounds into bounds of the object
-        if typ == 'iloc':
-            ax = self.obj._get_axis(axis)
-            l = len(ax)
-            start = obj.start
-            stop = obj.stop
-            step = obj.step
-            if start is not None:
-                # degenerate to return nothing
-                if start >= l:
-                    return self._getitem_axis(tuple(),axis=axis)
-
-                # equiv to a null slice
-                elif start <= -l:
-                    start = None
-            if stop is not None:
-                if stop > l:
-                    stop = None
-                elif stop <= -l:
-                    stop = None
-            obj = slice(start,stop,step)
-
-        return self.obj._slice(obj, axis=axis, raise_on_error=raise_on_error,
-                               typ=typ)
+    def _slice(self, obj, axis=0, typ=None):
+        return self.obj._slice(obj, axis=axis, typ=typ)
 
     def __setitem__(self, key, value):
 
@@ -441,7 +417,9 @@ def can_do_equal_len():
                         # align to
                         if item in value:
                             v = value[item]
-                            v = v.reindex(self.obj[item].index & v.index)
+                            i = self.obj[item].index
+                            v = v.reindex(i & v.index)
+
                             setter(item, v.values)
                         else:
                             setter(item, np.nan)
@@ -909,20 +887,10 @@ def _reindex(keys, level=None):
                 # asarray can be unsafe, NumPy strings are weird
                 keyarr = _asarray_tuplesafe(key)
 
-            if is_integer_dtype(keyarr) and not labels.is_floating():
-                if labels.inferred_type != 'integer':
-                    keyarr = np.where(keyarr < 0,
-                                      len(labels) + keyarr, keyarr)
-
-                if labels.inferred_type == 'mixed-integer':
-                    indexer = labels.get_indexer(keyarr)
-                    if (indexer >= 0).all():
-                        self.obj.take(indexer, axis=axis, convert=True)
-                    else:
-                        return self.obj.take(keyarr, axis=axis)
-                elif not labels.inferred_type == 'integer':
-
-                    return self.obj.take(keyarr, axis=axis)
+            # handle a mixed integer scenario
+            indexer = labels._convert_list_indexer_for_mixed(keyarr, typ=self.name)
+            if indexer is not None:
+                return self.obj.take(indexer, axis=axis)
 
             # this is not the most robust, but...
             if (isinstance(labels, MultiIndex) and
@@ -1062,11 +1030,9 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False):
                     objarr = _asarray_tuplesafe(obj)
 
                 # If have integer labels, defer to label-based indexing
-                if is_integer_dtype(objarr) and not is_int_index:
-                    if labels.inferred_type != 'integer':
-                        objarr = np.where(objarr < 0,
-                                          len(labels) + objarr, objarr)
-                    return objarr
+                indexer = labels._convert_list_indexer_for_mixed(objarr, typ=self.name)
+                if indexer is not None:
+                    return indexer
 
                 # this is not the most robust, but...
                 if (isinstance(labels, MultiIndex) and
@@ -1353,8 +1319,7 @@ def _get_slice_axis(self, slice_obj, axis=0):
             return obj
 
         if isinstance(slice_obj, slice):
-            return self._slice(slice_obj, axis=axis, raise_on_error=True,
-                               typ='iloc')
+            return self._slice(slice_obj, axis=axis, typ='iloc')
         else:
             return self.obj.take(slice_obj, axis=axis, convert=False)
 
@@ -1657,18 +1622,6 @@ def _need_slice(obj):
             (obj.step is not None and obj.step != 1))
 
 
-def _check_slice_bounds(slobj, values):
-    l = len(values)
-    start = slobj.start
-    if start is not None:
-        if start < -l or start > l - 1:
-            raise IndexError("out-of-bounds on slice (start)")
-    stop = slobj.stop
-    if stop is not None:
-        if stop < -l - 1 or stop > l:
-            raise IndexError("out-of-bounds on slice (end)")
-
-
 def _maybe_droplevels(index, key):
     # drop levels
     original_index = index