pandas-dev · jreback · Nov 29, 2015 · Nov 1, 2015
diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt
@@ -112,3 +112,7 @@ Bug Fixes
 - Bug in ``.loc`` against ``CategoricalIndex`` may result in normal ``Index`` (:issue:`11586`)
 - Bug groupby on tz-aware data where selection not returning ``Timestamp`` (:issue:`11616`)
 - Bug in timezone info lost when broadcasting scalar datetime to ``DataFrame`` (:issue:`11682`)
+
+
+- Bug in ``.loc`` result with duplicated key may have ``Index`` with incorrect dtype (:issue:`11497`)
+
diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -111,6 +111,10 @@ class Index(IndexOpsMixin, StringAccessorMixin, PandasObject):
     _is_numeric_dtype = False
     _can_hold_na = True
 
+    # prioritize current class for _shallow_copy_with_infer,
+    # used to infer integers as datetime-likes
+    _infer_as_myclass = False
+
     _engine_type = _index.ObjectEngine
 
     def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False,
@@ -209,6 +213,24 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False,
             subarr = com._asarray_tuplesafe(data, dtype=object)
             return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs)
 
+    """
+    NOTE for new Index creation:
+
+    - _simple_new: It returns new Index with the same type as the caller.
+      All metadata (such as name) must be provided by caller's responsibility.
+      Using _shallow_copy is recommended because it fills these metadata otherwise specified.
+
+    - _shallow_copy: It returns new Index with the same type (using _simple_new),
+      but fills caller's metadata otherwise specified. Passed kwargs will
+      overwrite corresponding metadata.
+
+    - _shallow_copy_with_infer: It returns new Index inferring its type
+      from passed values. It fills caller's metadata otherwise specified as the
+      same as _shallow_copy.
+
+    See each method's docstring.
+    """
+
     @classmethod
     def _simple_new(cls, values, name=None, dtype=None, **kwargs):
         """
@@ -233,6 +255,48 @@ def _simple_new(cls, values, name=None, dtype=None, **kwargs):
         result._reset_identity()
         return result
 
+    def _shallow_copy(self, values=None, **kwargs):
+        """
+        create a new Index with the same class as the caller, don't copy the data,
+        use the same object attributes with passed in attributes taking precedence
+
+        *this is an internal non-public method*
+
+        Parameters
+        ----------
+        values : the values to create the new Index, optional
+        kwargs : updates the default attributes for this Index
+        """
+        if values is None:
+            values = self.values
+        attributes = self._get_attributes_dict()
+        attributes.update(kwargs)
+        return self._simple_new(values, **attributes)
+
+    def _shallow_copy_with_infer(self, values=None, **kwargs):
+        """
+        create a new Index inferring the class with passed value, don't copy the data,
+        use the same object attributes with passed in attributes taking precedence
+
+        *this is an internal non-public method*
+
+        Parameters
+        ----------
+        values : the values to create the new Index, optional
+        kwargs : updates the default attributes for this Index
+        """
+        if values is None:
+            values = self.values
+        attributes = self._get_attributes_dict()
+        attributes.update(kwargs)
+        attributes['copy'] = False
+        if self._infer_as_myclass:
+            try:
+                return self._constructor(values, **attributes)
+            except (TypeError, ValueError) as e:
+                pass
+        return Index(values, **attributes)
+
     def _update_inplace(self, result, **kwargs):
         # guard when called from IndexOpsMixin
         raise TypeError("Index can't be updated inplace")
@@ -372,31 +436,6 @@ def view(self, cls=None):
             result._id = self._id
         return result
 
-    def _shallow_copy(self, values=None, infer=False, **kwargs):
-        """
-        create a new Index, don't copy the data, use the same object attributes
-        with passed in attributes taking precedence
-
-        *this is an internal non-public method*
-
-        Parameters
-        ----------
-        values : the values to create the new Index, optional
-        infer : boolean, default False
-            if True, infer the new type of the passed values
-        kwargs : updates the default attributes for this Index
-        """
-        if values is None:
-            values = self.values
-        attributes = self._get_attributes_dict()
-        attributes.update(kwargs)
-
-        if infer:
-            attributes['copy'] = False
-            return Index(values, **attributes)
-
-        return self.__class__._simple_new(values,**attributes)
-
     def _coerce_scalar_to_index(self, item):
         """
         we need to coerce a scalar to a compat for our index type
@@ -1206,7 +1245,7 @@ def append(self, other):
         to_concat, name = self._ensure_compat_append(other)
         attribs = self._get_attributes_dict()
         attribs['name'] = name
-        return self._shallow_copy(np.concatenate(to_concat), infer=True, **attribs)
+        return self._shallow_copy_with_infer(np.concatenate(to_concat), **attribs)
 
     @staticmethod
     def _ensure_compat_concat(indexes):
@@ -1725,7 +1764,7 @@ def sym_diff(self, other, result_name=None):
         attribs['name'] = result_name
         if 'freq' in attribs:
             attribs['freq'] = None
-        return self._shallow_copy(the_diff, infer=True, **attribs)
+        return self._shallow_copy_with_infer(the_diff, **attribs)
 
     def get_loc(self, key, method=None, tolerance=None):
         """
@@ -2199,7 +2238,8 @@ def _reindex_non_unique(self, target):
                 new_indexer = np.arange(len(self.take(indexer)))
                 new_indexer[~check] = -1
 
-        return self._shallow_copy(new_labels), indexer, new_indexer
+        new_index = self._shallow_copy_with_infer(new_labels, freq=None)
+        return new_index, indexer, new_indexer
 
     def join(self, other, how='left', level=None, return_indexers=False):
         """
@@ -2756,8 +2796,7 @@ def delete(self, loc):
         -------
         new_index : Index
         """
-        attribs = self._get_attributes_dict()
-        return self._shallow_copy(np.delete(self._data, loc), **attribs)
+        return self._shallow_copy(np.delete(self._data, loc))
 
     def insert(self, loc, item):
         """
@@ -2778,8 +2817,7 @@ def insert(self, loc, item):
 
         idx = np.concatenate(
             (_self[:loc], item, _self[loc:]))
-        attribs = self._get_attributes_dict()
-        return self._shallow_copy(idx, infer=True, **attribs)
+        return self._shallow_copy_with_infer(idx)
 
     def drop(self, labels, errors='raise'):
         """
@@ -2841,7 +2879,6 @@ def fillna(self, value=None, downcast=None):
                 # no need to care metadata other than name
                 # because it can't have freq if
                 return Index(result, name=self.name)
-
         return self._shallow_copy()
 
     def _evaluate_with_timedelta_like(self, other, op, opstr):
@@ -4316,10 +4353,15 @@ def view(self, cls=None):
         result._id = self._id
         return result
 
-    def _shallow_copy(self, values=None, infer=False, **kwargs):
+    def _shallow_copy_with_infer(self, values=None, **kwargs):
+        return self._shallow_copy(values, **kwargs)
+
+    def _shallow_copy(self, values=None, **kwargs):
         if values is not None:
             if 'name' in kwargs:
                 kwargs['names'] = kwargs.pop('name',None)
+            # discards freq
+            kwargs.pop('freq', None)
             return MultiIndex.from_tuples(values, **kwargs)
         return self.view()
 

diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py
@@ -3516,44 +3516,163 @@ def test_series_partial_set(self):
         # Regression from GH4825
         ser = Series([0.1, 0.2], index=[1, 2])
 
-        # ToDo: check_index_type can be True after GH 11497
-
         # loc
         expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3])
         result = ser.loc[[3, 2, 3]]
-        assert_series_equal(result, expected, check_index_type=False)
+        assert_series_equal(result, expected, check_index_type=True)
+
+        expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, 'x'])
+        result = ser.loc[[3, 2, 3, 'x']]
+        assert_series_equal(result, expected, check_index_type=True)
+
+        expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1])
+        result = ser.loc[[2, 2, 1]]
+        assert_series_equal(result, expected, check_index_type=True)
+
+        expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, 'x', 1])
+        result = ser.loc[[2, 2, 'x', 1]]
+        assert_series_equal(result, expected, check_index_type=True)
 
         # raises as nothing in in the index
         self.assertRaises(KeyError, lambda : ser.loc[[3, 3, 3]])
 
         expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3])
         result = ser.loc[[2, 2, 3]]
-        assert_series_equal(result, expected, check_index_type=False)
+        assert_series_equal(result, expected, check_index_type=True)
 
         expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4])
         result = Series([0.1, 0.2, 0.3], index=[1, 2, 3]).loc[[3, 4, 4]]
-        assert_series_equal(result, expected, check_index_type=False)
+        assert_series_equal(result, expected, check_index_type=True)
 
         expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3])
         result = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]).loc[[5, 3, 3]]
-        assert_series_equal(result, expected, check_index_type=False)
+        assert_series_equal(result, expected, check_index_type=True)
 
         expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4])
         result = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]).loc[[5, 4, 4]]
-        assert_series_equal(result, expected, check_index_type=False)
+        assert_series_equal(result, expected, check_index_type=True)
 
         expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2])
         result = Series([0.1, 0.2, 0.3, 0.4], index=[4, 5, 6, 7]).loc[[7, 2, 2]]
-        assert_series_equal(result, expected, check_index_type=False)
+        assert_series_equal(result, expected, check_index_type=True)
 
         expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5])
         result = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]).loc[[4, 5, 5]]
-        assert_series_equal(result, expected, check_index_type=False)
+        assert_series_equal(result, expected, check_index_type=True)
 
         # iloc
         expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1])
         result = ser.iloc[[1, 1, 0, 0]]
-        assert_series_equal(result, expected, check_index_type=False)
+        assert_series_equal(result, expected, check_index_type=True)
+
+    def test_series_partial_set_with_name(self):
+        # GH 11497
+
+        idx = Index([1, 2], dtype='int64', name='idx')
+        ser = Series([0.1, 0.2], index=idx, name='s')
+
+        # loc
+        exp_idx = Index([3, 2, 3], dtype='int64', name='idx')
+        expected = Series([np.nan, 0.2, np.nan], index=exp_idx, name='s')
+        result = ser.loc[[3, 2, 3]]
+        assert_series_equal(result, expected, check_index_type=True)
+
+        exp_idx = Index([3, 2, 3, 'x'], dtype='object', name='idx')
+        expected = Series([np.nan, 0.2, np.nan, np.nan], index=exp_idx, name='s')
+        result = ser.loc[[3, 2, 3, 'x']]
+        assert_series_equal(result, expected, check_index_type=True)
+
+        exp_idx = Index([2, 2, 1], dtype='int64', name='idx')
+        expected = Series([0.2, 0.2, 0.1], index=exp_idx, name='s')
+        result = ser.loc[[2, 2, 1]]
+        assert_series_equal(result, expected, check_index_type=True)
+
+        exp_idx = Index([2, 2, 'x', 1], dtype='object', name='idx')
+        expected = Series([0.2, 0.2, np.nan, 0.1], index=exp_idx, name='s')
+        result = ser.loc[[2, 2, 'x', 1]]
+        assert_series_equal(result, expected, check_index_type=True)
+
+        # raises as nothing in in the index
+        self.assertRaises(KeyError, lambda : ser.loc[[3, 3, 3]])
+
+        exp_idx = Index([2, 2, 3], dtype='int64', name='idx')
+        expected = Series([0.2, 0.2, np.nan], index=exp_idx, name='s')
+        result = ser.loc[[2, 2, 3]]
+        assert_series_equal(result, expected, check_index_type=True)
+
+        exp_idx = Index([3, 4, 4], dtype='int64', name='idx')
+        expected = Series([0.3, np.nan, np.nan], index=exp_idx, name='s')
+        idx = Index([1, 2, 3], dtype='int64', name='idx')
+        result = Series([0.1, 0.2, 0.3], index=idx, name='s').loc[[3, 4, 4]]
+        assert_series_equal(result, expected, check_index_type=True)
+
+        exp_idx = Index([5, 3, 3], dtype='int64', name='idx')
+        expected = Series([np.nan, 0.3, 0.3], index=exp_idx, name='s')
+        idx = Index([1, 2, 3, 4], dtype='int64', name='idx')
+        result = Series([0.1, 0.2, 0.3, 0.4], index=idx, name='s').loc[[5, 3, 3]]
+        assert_series_equal(result, expected, check_index_type=True)
+
+        exp_idx = Index([5, 4, 4], dtype='int64', name='idx')
+        expected = Series([np.nan, 0.4, 0.4], index=exp_idx, name='s')
+        idx = Index([1, 2, 3, 4], dtype='int64', name='idx')
+        result = Series([0.1, 0.2, 0.3, 0.4], index=idx, name='s').loc[[5, 4, 4]]
+        assert_series_equal(result, expected, check_index_type=True)
+
+        exp_idx = Index([7, 2, 2], dtype='int64', name='idx')
+        expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s')
+        idx = Index([4, 5, 6, 7], dtype='int64', name='idx')
+        result = Series([0.1, 0.2, 0.3, 0.4], index=idx, name='s').loc[[7, 2, 2]]
+        assert_series_equal(result, expected, check_index_type=True)
+
+        exp_idx = Index([4, 5, 5], dtype='int64', name='idx')
+        expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s')
+        idx = Index([1, 2, 3, 4], dtype='int64', name='idx')
+        result = Series([0.1, 0.2, 0.3, 0.4], index=idx, name='s').loc[[4, 5, 5]]
+        assert_series_equal(result, expected, check_index_type=True)
+
+        # iloc
+        exp_idx = Index([2, 2, 1, 1], dtype='int64', name='idx')
+        expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name='s')
+        result = ser.iloc[[1,1,0,0]]
+        assert_series_equal(result, expected, check_index_type=True)
+
+    def test_series_partial_set_datetime(self):
+        # GH 11497
+
+        idx = date_range('2011-01-01', '2011-01-02', freq='D', name='idx')
+        ser = Series([0.1, 0.2], index=idx, name='s')
+
+        result = ser.loc[[Timestamp('2011-01-01'), Timestamp('2011-01-02')]]
+        exp = Series([0.1, 0.2], index=idx, name='s')
+        assert_series_equal(result, exp, check_index_type=True)
+
+        keys = [Timestamp('2011-01-02'), Timestamp('2011-01-02'), Timestamp('2011-01-01')]
+        exp = Series([0.2, 0.2, 0.1], index=pd.DatetimeIndex(keys, name='idx'), name='s')
+        assert_series_equal(ser.loc[keys], exp, check_index_type=True)
+
+        keys = [Timestamp('2011-01-03'), Timestamp('2011-01-02'), Timestamp('2011-01-03')]
+        exp = Series([np.nan, 0.2, np.nan], index=pd.DatetimeIndex(keys, name='idx'), name='s')
+        assert_series_equal(ser.loc[keys], exp, check_index_type=True)
+
+    def test_series_partial_set_period(self):
+        # GH 11497
+
+        idx = pd.period_range('2011-01-01', '2011-01-02', freq='D', name='idx')
+        ser = Series([0.1, 0.2], index=idx, name='s')
+
+        result = ser.loc[[pd.Period('2011-01-01', freq='D'), pd.Period('2011-01-02', freq='D')]]
+        exp = Series([0.1, 0.2], index=idx, name='s')
+        assert_series_equal(result, exp, check_index_type=True)
+
+        keys = [pd.Period('2011-01-02', freq='D'), pd.Period('2011-01-02', freq='D'),
+                pd.Period('2011-01-01', freq='D')]
+        exp = Series([0.2, 0.2, 0.1], index=pd.PeriodIndex(keys, name='idx'), name='s')
+        assert_series_equal(ser.loc[keys], exp, check_index_type=True)
+
+        keys = [pd.Period('2011-01-03', freq='D'), pd.Period('2011-01-02', freq='D'),
+                pd.Period('2011-01-03', freq='D')]
+        exp = Series([np.nan, 0.2, np.nan], index=pd.PeriodIndex(keys, name='idx'), name='s')
+        assert_series_equal(ser.loc[keys], exp, check_index_type=True)
 
     def test_partial_set_invalid(self):
 

diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
@@ -197,7 +197,7 @@ def _join_i8_wrapper(joinf, **kwargs):
                          'is_quarter_start','is_quarter_end','is_year_start','is_year_end',
                          'tz','freq']
     _is_numeric_dtype = False
-
+    _infer_as_myclass = True
 
     @deprecate_kwarg(old_arg_name='infer_dst', new_arg_name='ambiguous',
                      mapping={True: 'infer', False: 'raise'})
@@ -778,7 +778,7 @@ def astype(self, dtype):
         elif dtype == _NS_DTYPE and self.tz is not None:
             return self.tz_convert('UTC').tz_localize(None)
         elif dtype == str:
-            return self._shallow_copy(values=self.format(), infer=True)
+            return Index(self.format(), name=self.name, dtype=object)
         else:  # pragma: no cover
             raise ValueError('Cannot cast DatetimeIndex to dtype %s' % dtype)