pandas-dev · toobaz · Feb 15, 2016 · May 16, 2016 · May 17, 2016 · May 17, 2016
diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt
@@ -345,7 +345,8 @@ Bug Fixes
 - Bug in ``pd.read_hdf()`` where attempting to load an HDF file with a single dataset, that had one or more categorical columns, failed unless the key argument was set to the name of the dataset. (:issue:`13231`)
 
 
-
+- Bug in various index types, which did not propagate the name of passed index (:issue:`12309`)
+- Bug in ``DatetimeIndex``, which did not honour the ``copy=True`` (:issue:`13205`)
 - Bug in ``MultiIndex`` slicing where extra elements were returned when level is non-unique (:issue:`12896`)
 
 

diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py
@@ -376,6 +376,33 @@ def _shallow_copy_with_infer(self, values=None, **kwargs):
                 pass
         return Index(values, **attributes)
 
+    def _deepcopy_if_needed(self, orig, copy=False):
+        """
+        .. versionadded:: 0.18.2
+
+        Make a copy of self if data coincides (in memory) with orig.
+        Subclasses should override this if self._base is not an ndarray.
+
+        Parameters
+        ----------
+        orig : ndarray
+            other ndarray to compare self._data against
+        copy : boolean, default False
+            when False, do not run any check, just return self
+
+        Returns
+        -------
+        A copy of self if needed, otherwise self : Index
+        """
+        if copy:
+            # Retrieve the "base objects", i.e. the original memory allocations
+            orig = orig if orig.base is None else orig.base
+            new = self._data if self._data.base is None else self._data.base
+            if orig is new:
+                return self.copy(deep=True)
+
+        return self
+
     def _update_inplace(self, result, **kwargs):
         # guard when called from IndexOpsMixin
         raise TypeError("Index can't be updated inplace")

diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py
@@ -46,6 +46,9 @@ def __new__(cls, data=None, categories=None, ordered=None, dtype=None,
         if fastpath:
             return cls._simple_new(data, name=name)
 
+        if name is None and hasattr(data, 'name'):
+            name = data.name
+
         if isinstance(data, com.ABCCategorical):
             data = cls._create_categorical(cls, data, categories, ordered)
         elif isinstance(data, CategoricalIndex):

diff --git a/pandas/indexes/numeric.py b/pandas/indexes/numeric.py
@@ -22,6 +22,28 @@ class NumericIndex(Index):
     """
     _is_numeric_dtype = True
 
+    def __new__(cls, data=None, dtype=None, copy=False, name=None,
+                fastpath=False):
+
+        if fastpath:
+            return cls._simple_new(data, name=name)
+
+        # isscalar, generators handled in coerce_to_ndarray
+        data = cls._coerce_to_ndarray(data)
+
+        if issubclass(data.dtype.type, compat.string_types):
+            cls._string_data_error(data)
+
+        if copy or not com.is_dtype_equal(data.dtype, cls._default_dtype):
+            subarr = np.array(data, dtype=cls._default_dtype, copy=copy)
+            cls._assert_safe_casting(data, subarr)
+        else:
+            subarr = data
+
+        if name is None and hasattr(data, 'name'):
+            name = data.name
+        return cls._simple_new(subarr, name=name)
+
     def _maybe_cast_slice_bound(self, label, side, kind):
         """
         This function should be overloaded in subclasses that allow non-trivial
@@ -55,6 +77,15 @@ def _convert_tolerance(self, tolerance):
             raise ValueError('tolerance argument for %s must be numeric: %r' %
                              (type(self).__name__, tolerance))
 
+    @classmethod
+    def _assert_safe_casting(cls, data, subarr):
+        """
+        Subclasses need to override this only if the process of casting data
+        from some accepted dtype to the internal dtype(s) bears the risk of
+        truncation (e.g. float to int).
+        """
+        pass
+
 
 class Int64Index(NumericIndex):
     """
@@ -90,29 +121,7 @@ class Int64Index(NumericIndex):
 
     _engine_type = _index.Int64Engine
 
-    def __new__(cls, data=None, dtype=None, copy=False, name=None,
-                fastpath=False, **kwargs):
-
-        if fastpath:
-            return cls._simple_new(data, name=name)
-
-        # isscalar, generators handled in coerce_to_ndarray
-        data = cls._coerce_to_ndarray(data)
-
-        if issubclass(data.dtype.type, compat.string_types):
-            cls._string_data_error(data)
-
-        elif issubclass(data.dtype.type, np.integer):
-            dtype = np.int64
-            subarr = np.array(data, dtype=dtype, copy=copy)
-        else:
-            subarr = np.array(data, dtype=np.int64, copy=copy)
-            if len(data) > 0:
-                if (subarr != data).any():
-                    raise TypeError('Unsafe NumPy casting to integer, you must'
-                                    ' explicitly cast')
-
-        return cls._simple_new(subarr, name=name)
+    _default_dtype = np.int64
 
     @property
     def inferred_type(self):
@@ -155,17 +164,22 @@ def equals(self, other):
         if self.is_(other):
             return True
 
-        try:
-            return com.array_equivalent(com._values_from_object(self),
-                                        com._values_from_object(other))
-        except TypeError:
-            # e.g. fails in numpy 1.6 with DatetimeIndex #1681
-            return False
+        return com.array_equivalent(com._values_from_object(self),
+                                    com._values_from_object(other))
 
     def _wrap_joined_index(self, joined, other):
         name = self.name if self.name == other.name else None
         return Int64Index(joined, name=name)
 
+    @classmethod
+    def _assert_safe_casting(cls, data, subarr):
+        """
+        Ensure incoming data can be represented as ints.
+        """
+        if not issubclass(data.dtype.type, np.integer):
+            if not np.array_equal(data, subarr):
+                raise TypeError('Unsafe NumPy casting, you must '
+                                'explicitly cast')
 
 Int64Index._add_numeric_methods()
 Int64Index._add_logical_methods()
@@ -200,39 +214,7 @@ class Float64Index(NumericIndex):
     _inner_indexer = _algos.inner_join_indexer_float64
     _outer_indexer = _algos.outer_join_indexer_float64
 
-    def __new__(cls, data=None, dtype=None, copy=False, name=None,
-                fastpath=False, **kwargs):
-
-        if fastpath:
-            return cls._simple_new(data, name)
-
-        data = cls._coerce_to_ndarray(data)
-
-        if issubclass(data.dtype.type, compat.string_types):
-            cls._string_data_error(data)
-
-        if dtype is None:
-            dtype = np.float64
-        dtype = np.dtype(dtype)
-
-        # allow integer / object dtypes to be passed, but coerce to float64
-        if dtype.kind in ['i', 'O', 'f']:
-            dtype = np.float64
-
-        else:
-            raise TypeError("cannot support {0} dtype in "
-                            "Float64Index".format(dtype))
-
-        try:
-            subarr = np.array(data, dtype=dtype, copy=copy)
-        except:
-            raise TypeError('Unsafe NumPy casting, you must explicitly cast')
-
-        # coerce to float64 for storage
-        if subarr.dtype != np.float64:
-            subarr = subarr.astype(np.float64)
-
-        return cls._simple_new(subarr, name)
+    _default_dtype = np.float64
 
     @property
     def inferred_type(self):
@@ -339,8 +321,7 @@ def equals(self, other):
                 return False
             left, right = self._values, other._values
             return ((left == right) | (self._isnan & other._isnan)).all()
-        except TypeError:
-            # e.g. fails in numpy 1.6 with DatetimeIndex #1681
+        except (TypeError, ValueError):
             return False
 
     def __contains__(self, other):
@@ -392,6 +373,5 @@ def isin(self, values, level=None):
         return lib.ismember_nans(np.array(self), value_set,
                                  isnull(list(value_set)).any())
 
-
 Float64Index._add_numeric_methods()
 Float64Index._add_logical_methods_disabled()
diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py
@@ -372,11 +372,13 @@ def test_consolidate_datetime64(self):
         ser_starting.index = ser_starting.values
         ser_starting = ser_starting.tz_localize('US/Eastern')
         ser_starting = ser_starting.tz_convert('UTC')
+        ser_starting.index.name = 'starting'
 
         ser_ending = df.ending
         ser_ending.index = ser_ending.values
         ser_ending = ser_ending.tz_localize('US/Eastern')
         ser_ending = ser_ending.tz_convert('UTC')
+        ser_ending.index.name = 'ending'
 
         df.starting = ser_starting.index
         df.ending = ser_ending.index

diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
@@ -205,6 +205,53 @@ def test_hash_error(self):
                                        type(ind).__name__):
                 hash(ind)
 
+    def test_copy_name(self):
+        # Check that "name" argument passed at initialization is honoured
+        # GH12309
+        for name, index in compat.iteritems(self.indices):
+            if isinstance(index, MultiIndex):
+                continue
+
+            first = index.__class__(index, copy=True, name='mario')
+            second = first.__class__(first, copy=False)
+
+            # Even though "copy=False", we want a new object.
+            self.assertIsNot(first, second)
+            # Not using tm.assert_index_equal() since names differ:
+            self.assertTrue(index.equals(first))
+
+            self.assertEqual(first.name, 'mario')
+            self.assertEqual(second.name, 'mario')
+
+            s1 = Series(2, index=first)
+            s2 = Series(3, index=second[:-1])
+            if not isinstance(index, CategoricalIndex):  # See GH13365
+                s3 = s1 * s2
+                self.assertEqual(s3.index.name, 'mario')
+
+    def test_ensure_copied_data(self):
+        # Check the "copy" argument of each Index.__new__ is honoured
+        # GH12309
+        for name, index in compat.iteritems(self.indices):
+            init_kwargs = {}
+            if isinstance(index, PeriodIndex):
+                # Needs "freq" specification:
+                init_kwargs['freq'] = index.freq
+            elif isinstance(index, (RangeIndex, MultiIndex, CategoricalIndex)):
+                # RangeIndex cannot be initialized from data
+                # MultiIndex and CategoricalIndex are tested separately
+                continue
+
+            index_type = index.__class__
+            result = index_type(index.values, copy=True, **init_kwargs)
+            tm.assert_index_equal(index, result)
+            tm.assert_numpy_array_equal(index.values, result.values,
+                                        check_same='copy')
+
+            result = index_type(index.values, copy=False, **init_kwargs)
+            tm.assert_numpy_array_equal(index.values, result.values,
+                                        check_same='same')
+
     def test_copy_and_deepcopy(self):
         from copy import copy, deepcopy
 

diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
@@ -172,6 +172,7 @@ def test_constructor_from_series(self):
         df['date'] = ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990',
                       '5-1-1990']
         result = DatetimeIndex(df['date'], freq='MS')
+        expected.name = 'date'
         self.assert_index_equal(result, expected)
         self.assertEqual(df['date'].dtype, object)
 

diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
@@ -507,6 +507,20 @@ def test_identical(self):
         self.assertTrue(ci1.identical(ci1.copy()))
         self.assertFalse(ci1.identical(ci2))
 
+    def test_ensure_copied_data(self):
+        # Check the "copy" argument of each Index.__new__ is honoured
+        # GH12309
+        # Must be tested separately from other indexes because
+        # self.value is not an ndarray
+        _base = lambda ar : ar if ar.base is None else ar.base
+        for index in self.indices.values():
+            result = CategoricalIndex(index.values, copy=True)
+            tm.assert_index_equal(index, result)
+            self.assertIsNot(_base(index.values), _base(result.values))
+
+            result = CategoricalIndex(index.values, copy=False)
+            self.assertIs(_base(index.values), _base(result.values))
+
     def test_equals(self):
 
         ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True)

diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py
@@ -169,8 +169,8 @@ def test_constructor(self):
         # explicit construction
         index = Float64Index([1, 2, 3, 4, 5])
         self.assertIsInstance(index, Float64Index)
-        self.assertTrue((index.values == np.array(
-            [1, 2, 3, 4, 5], dtype='float64')).all())
+        expected = np.array([1, 2, 3, 4, 5], dtype='float64')
+        self.assert_numpy_array_equal(index.values, expected)
         index = Float64Index(np.array([1, 2, 3, 4, 5]))
         self.assertIsInstance(index, Float64Index)
         index = Float64Index([1., 2, 3, 4, 5])

diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py
@@ -315,6 +315,17 @@ def test_numpy_array_equal_object_message(self):
         with assertRaisesRegexp(AssertionError, expected):
             assert_almost_equal(a, b)
 
+    def test_numpy_array_equal_copy_flag(self):
+        a = np.array([1, 2, 3])
+        b = a.copy()
+        c = a.view()
+        expected = 'array\(\[1, 2, 3\]\) is not array\(\[1, 2, 3\]\)'
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_numpy_array_equal(a, b, check_same='same')
+        expected = 'array\(\[1, 2, 3\]\) is array\(\[1, 2, 3\]\)'
+        with assertRaisesRegexp(AssertionError, expected):
+            assert_numpy_array_equal(a, c, check_same='copy')
+
     def test_assert_almost_equal_iterable_message(self):
 
         expected = """Iterable are different

diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
@@ -225,6 +225,15 @@ def __new__(cls, data=None,
                 verify_integrity=True, normalize=False,
                 closed=None, ambiguous='raise', dtype=None, **kwargs):
 
+        # This allows to later ensure that the 'copy' parameter is honored:
+        if isinstance(data, Index):
+            ref_to_data = data._data
+        else:
+            ref_to_data = data
+
+        if name is None and hasattr(data, 'name'):
+            name = data.name
+
         dayfirst = kwargs.pop('dayfirst', None)
         yearfirst = kwargs.pop('yearfirst', None)
 
@@ -302,7 +311,7 @@ def __new__(cls, data=None,
                             raise TypeError("Already tz-aware, use tz_convert "
                                             "to convert.")
 
-                    return data
+                    return data._deepcopy_if_needed(ref_to_data, copy)
 
         if issubclass(data.dtype.type, compat.string_types):
             data = tslib.parse_str_array_to_datetime(data, freq=freq,
@@ -335,10 +344,7 @@ def __new__(cls, data=None,
         elif data.dtype == _INT64_DTYPE:
             if isinstance(data, Int64Index):
                 raise TypeError('cannot convert Int64Index->DatetimeIndex')
-            if copy:
-                subarr = np.asarray(data, dtype=_NS_DTYPE)
-            else:
-                subarr = data.view(_NS_DTYPE)
+            subarr = data.view(_NS_DTYPE)
         else:
             if isinstance(data, (ABCSeries, Index)):
                 values = data._values
@@ -414,7 +420,7 @@ def __new__(cls, data=None,
             if inferred:
                 subarr.offset = to_offset(inferred)
 
-        return subarr
+        return subarr._deepcopy_if_needed(ref_to_data, copy)
 
     @classmethod
     def _generate(cls, start, end, periods, name, offset,