diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index 93aedce07da9d..eb0de57f6a9de 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -345,7 +345,8 @@ Bug Fixes - Bug in ``pd.read_hdf()`` where attempting to load an HDF file with a single dataset, that had one or more categorical columns, failed unless the key argument was set to the name of the dataset. (:issue:`13231`) - +- Bug in various index types, which did not propagate the name of passed index (:issue:`12309`) +- Bug in ``DatetimeIndex``, which did not honour the ``copy=True`` (:issue:`13205`) - Bug in ``MultiIndex`` slicing where extra elements were returned when level is non-unique (:issue:`12896`) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 82f16becbd511..96472698ba9d9 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -376,6 +376,33 @@ def _shallow_copy_with_infer(self, values=None, **kwargs): pass return Index(values, **attributes) + def _deepcopy_if_needed(self, orig, copy=False): + """ + .. versionadded:: 0.18.2 + + Make a copy of self if data coincides (in memory) with orig. + Subclasses should override this if self._base is not an ndarray. + + Parameters + ---------- + orig : ndarray + other ndarray to compare self._data against + copy : boolean, default False + when False, do not run any check, just return self + + Returns + ------- + A copy of self if needed, otherwise self : Index + """ + if copy: + # Retrieve the "base objects", i.e. the original memory allocations + orig = orig if orig.base is None else orig.base + new = self._data if self._data.base is None else self._data.base + if orig is new: + return self.copy(deep=True) + + return self + def _update_inplace(self, result, **kwargs): # guard when called from IndexOpsMixin raise TypeError("Index can't be updated inplace") diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index e877e43bcc603..4c9ca43f7f25d 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -46,6 +46,9 @@ def __new__(cls, data=None, categories=None, ordered=None, dtype=None, if fastpath: return cls._simple_new(data, name=name) + if name is None and hasattr(data, 'name'): + name = data.name + if isinstance(data, com.ABCCategorical): data = cls._create_categorical(cls, data, categories, ordered) elif isinstance(data, CategoricalIndex): diff --git a/pandas/indexes/numeric.py b/pandas/indexes/numeric.py index 0deaf4da9b2bb..0fda902b0519d 100644 --- a/pandas/indexes/numeric.py +++ b/pandas/indexes/numeric.py @@ -22,6 +22,28 @@ class NumericIndex(Index): """ _is_numeric_dtype = True + def __new__(cls, data=None, dtype=None, copy=False, name=None, + fastpath=False): + + if fastpath: + return cls._simple_new(data, name=name) + + # isscalar, generators handled in coerce_to_ndarray + data = cls._coerce_to_ndarray(data) + + if issubclass(data.dtype.type, compat.string_types): + cls._string_data_error(data) + + if copy or not com.is_dtype_equal(data.dtype, cls._default_dtype): + subarr = np.array(data, dtype=cls._default_dtype, copy=copy) + cls._assert_safe_casting(data, subarr) + else: + subarr = data + + if name is None and hasattr(data, 'name'): + name = data.name + return cls._simple_new(subarr, name=name) + def _maybe_cast_slice_bound(self, label, side, kind): """ This function should be overloaded in subclasses that allow non-trivial @@ -55,6 +77,15 @@ def _convert_tolerance(self, tolerance): raise ValueError('tolerance argument for %s must be numeric: %r' % (type(self).__name__, tolerance)) + @classmethod + def _assert_safe_casting(cls, data, subarr): + """ + Subclasses need to override this only if the process of casting data + from some accepted dtype to the internal dtype(s) bears the risk of + truncation (e.g. float to int). + """ + pass + class Int64Index(NumericIndex): """ @@ -90,29 +121,7 @@ class Int64Index(NumericIndex): _engine_type = _index.Int64Engine - def __new__(cls, data=None, dtype=None, copy=False, name=None, - fastpath=False, **kwargs): - - if fastpath: - return cls._simple_new(data, name=name) - - # isscalar, generators handled in coerce_to_ndarray - data = cls._coerce_to_ndarray(data) - - if issubclass(data.dtype.type, compat.string_types): - cls._string_data_error(data) - - elif issubclass(data.dtype.type, np.integer): - dtype = np.int64 - subarr = np.array(data, dtype=dtype, copy=copy) - else: - subarr = np.array(data, dtype=np.int64, copy=copy) - if len(data) > 0: - if (subarr != data).any(): - raise TypeError('Unsafe NumPy casting to integer, you must' - ' explicitly cast') - - return cls._simple_new(subarr, name=name) + _default_dtype = np.int64 @property def inferred_type(self): @@ -155,17 +164,22 @@ def equals(self, other): if self.is_(other): return True - try: - return com.array_equivalent(com._values_from_object(self), - com._values_from_object(other)) - except TypeError: - # e.g. fails in numpy 1.6 with DatetimeIndex #1681 - return False + return com.array_equivalent(com._values_from_object(self), + com._values_from_object(other)) def _wrap_joined_index(self, joined, other): name = self.name if self.name == other.name else None return Int64Index(joined, name=name) + @classmethod + def _assert_safe_casting(cls, data, subarr): + """ + Ensure incoming data can be represented as ints. + """ + if not issubclass(data.dtype.type, np.integer): + if not np.array_equal(data, subarr): + raise TypeError('Unsafe NumPy casting, you must ' + 'explicitly cast') Int64Index._add_numeric_methods() Int64Index._add_logical_methods() @@ -200,39 +214,7 @@ class Float64Index(NumericIndex): _inner_indexer = _algos.inner_join_indexer_float64 _outer_indexer = _algos.outer_join_indexer_float64 - def __new__(cls, data=None, dtype=None, copy=False, name=None, - fastpath=False, **kwargs): - - if fastpath: - return cls._simple_new(data, name) - - data = cls._coerce_to_ndarray(data) - - if issubclass(data.dtype.type, compat.string_types): - cls._string_data_error(data) - - if dtype is None: - dtype = np.float64 - dtype = np.dtype(dtype) - - # allow integer / object dtypes to be passed, but coerce to float64 - if dtype.kind in ['i', 'O', 'f']: - dtype = np.float64 - - else: - raise TypeError("cannot support {0} dtype in " - "Float64Index".format(dtype)) - - try: - subarr = np.array(data, dtype=dtype, copy=copy) - except: - raise TypeError('Unsafe NumPy casting, you must explicitly cast') - - # coerce to float64 for storage - if subarr.dtype != np.float64: - subarr = subarr.astype(np.float64) - - return cls._simple_new(subarr, name) + _default_dtype = np.float64 @property def inferred_type(self): @@ -339,8 +321,7 @@ def equals(self, other): return False left, right = self._values, other._values return ((left == right) | (self._isnan & other._isnan)).all() - except TypeError: - # e.g. fails in numpy 1.6 with DatetimeIndex #1681 + except (TypeError, ValueError): return False def __contains__(self, other): @@ -392,6 +373,5 @@ def isin(self, values, level=None): return lib.ismember_nans(np.array(self), value_set, isnull(list(value_set)).any()) - Float64Index._add_numeric_methods() Float64Index._add_logical_methods_disabled() diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 0421cf2ba42d2..38163d89355e9 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -372,11 +372,13 @@ def test_consolidate_datetime64(self): ser_starting.index = ser_starting.values ser_starting = ser_starting.tz_localize('US/Eastern') ser_starting = ser_starting.tz_convert('UTC') + ser_starting.index.name = 'starting' ser_ending = df.ending ser_ending.index = ser_ending.values ser_ending = ser_ending.tz_localize('US/Eastern') ser_ending = ser_ending.tz_convert('UTC') + ser_ending.index.name = 'ending' df.starting = ser_starting.index df.ending = ser_ending.index diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index e342eee2aabbb..d6f7493bb25f9 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -205,6 +205,53 @@ def test_hash_error(self): type(ind).__name__): hash(ind) + def test_copy_name(self): + # Check that "name" argument passed at initialization is honoured + # GH12309 + for name, index in compat.iteritems(self.indices): + if isinstance(index, MultiIndex): + continue + + first = index.__class__(index, copy=True, name='mario') + second = first.__class__(first, copy=False) + + # Even though "copy=False", we want a new object. + self.assertIsNot(first, second) + # Not using tm.assert_index_equal() since names differ: + self.assertTrue(index.equals(first)) + + self.assertEqual(first.name, 'mario') + self.assertEqual(second.name, 'mario') + + s1 = Series(2, index=first) + s2 = Series(3, index=second[:-1]) + if not isinstance(index, CategoricalIndex): # See GH13365 + s3 = s1 * s2 + self.assertEqual(s3.index.name, 'mario') + + def test_ensure_copied_data(self): + # Check the "copy" argument of each Index.__new__ is honoured + # GH12309 + for name, index in compat.iteritems(self.indices): + init_kwargs = {} + if isinstance(index, PeriodIndex): + # Needs "freq" specification: + init_kwargs['freq'] = index.freq + elif isinstance(index, (RangeIndex, MultiIndex, CategoricalIndex)): + # RangeIndex cannot be initialized from data + # MultiIndex and CategoricalIndex are tested separately + continue + + index_type = index.__class__ + result = index_type(index.values, copy=True, **init_kwargs) + tm.assert_index_equal(index, result) + tm.assert_numpy_array_equal(index.values, result.values, + check_same='copy') + + result = index_type(index.values, copy=False, **init_kwargs) + tm.assert_numpy_array_equal(index.values, result.values, + check_same='same') + def test_copy_and_deepcopy(self): from copy import copy, deepcopy diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index aa007c039f8ee..d535eaa238567 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -172,6 +172,7 @@ def test_constructor_from_series(self): df['date'] = ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990'] result = DatetimeIndex(df['date'], freq='MS') + expected.name = 'date' self.assert_index_equal(result, expected) self.assertEqual(df['date'].dtype, object) diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index c64b1e9fc4af8..e066842c33126 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -507,6 +507,20 @@ def test_identical(self): self.assertTrue(ci1.identical(ci1.copy())) self.assertFalse(ci1.identical(ci2)) + def test_ensure_copied_data(self): + # Check the "copy" argument of each Index.__new__ is honoured + # GH12309 + # Must be tested separately from other indexes because + # self.value is not an ndarray + _base = lambda ar : ar if ar.base is None else ar.base + for index in self.indices.values(): + result = CategoricalIndex(index.values, copy=True) + tm.assert_index_equal(index, result) + self.assertIsNot(_base(index.values), _base(result.values)) + + result = CategoricalIndex(index.values, copy=False) + self.assertIs(_base(index.values), _base(result.values)) + def test_equals(self): ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 5eac0bc870756..90025fa014b78 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -169,8 +169,8 @@ def test_constructor(self): # explicit construction index = Float64Index([1, 2, 3, 4, 5]) self.assertIsInstance(index, Float64Index) - self.assertTrue((index.values == np.array( - [1, 2, 3, 4, 5], dtype='float64')).all()) + expected = np.array([1, 2, 3, 4, 5], dtype='float64') + self.assert_numpy_array_equal(index.values, expected) index = Float64Index(np.array([1, 2, 3, 4, 5])) self.assertIsInstance(index, Float64Index) index = Float64Index([1., 2, 3, 4, 5]) diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py index c4e864a909c03..c242213ee226f 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/test_testing.py @@ -315,6 +315,17 @@ def test_numpy_array_equal_object_message(self): with assertRaisesRegexp(AssertionError, expected): assert_almost_equal(a, b) + def test_numpy_array_equal_copy_flag(self): + a = np.array([1, 2, 3]) + b = a.copy() + c = a.view() + expected = 'array\(\[1, 2, 3\]\) is not array\(\[1, 2, 3\]\)' + with assertRaisesRegexp(AssertionError, expected): + assert_numpy_array_equal(a, b, check_same='same') + expected = 'array\(\[1, 2, 3\]\) is array\(\[1, 2, 3\]\)' + with assertRaisesRegexp(AssertionError, expected): + assert_numpy_array_equal(a, c, check_same='copy') + def test_assert_almost_equal_iterable_message(self): expected = """Iterable are different diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 83ab5d2a2bce4..af60a2d028c93 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -225,6 +225,15 @@ def __new__(cls, data=None, verify_integrity=True, normalize=False, closed=None, ambiguous='raise', dtype=None, **kwargs): + # This allows to later ensure that the 'copy' parameter is honored: + if isinstance(data, Index): + ref_to_data = data._data + else: + ref_to_data = data + + if name is None and hasattr(data, 'name'): + name = data.name + dayfirst = kwargs.pop('dayfirst', None) yearfirst = kwargs.pop('yearfirst', None) @@ -302,7 +311,7 @@ def __new__(cls, data=None, raise TypeError("Already tz-aware, use tz_convert " "to convert.") - return data + return data._deepcopy_if_needed(ref_to_data, copy) if issubclass(data.dtype.type, compat.string_types): data = tslib.parse_str_array_to_datetime(data, freq=freq, @@ -335,10 +344,7 @@ def __new__(cls, data=None, elif data.dtype == _INT64_DTYPE: if isinstance(data, Int64Index): raise TypeError('cannot convert Int64Index->DatetimeIndex') - if copy: - subarr = np.asarray(data, dtype=_NS_DTYPE) - else: - subarr = data.view(_NS_DTYPE) + subarr = data.view(_NS_DTYPE) else: if isinstance(data, (ABCSeries, Index)): values = data._values @@ -414,7 +420,7 @@ def __new__(cls, data=None, if inferred: subarr.offset = to_offset(inferred) - return subarr + return subarr._deepcopy_if_needed(ref_to_data, copy) @classmethod def _generate(cls, start, end, periods, name, offset, diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index c3deee5f6dab2..8a3ac1f080c90 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -182,6 +182,9 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, raise ValueError('Periods must be a number, got %s' % str(periods)) + if name is None and hasattr(data, 'name'): + name = data.name + if data is None: if ordinal is not None: data = np.asarray(ordinal, dtype=np.int64) @@ -190,7 +193,7 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, freq, kwargs) else: ordinal, freq = cls._from_arraylike(data, freq, tz) - data = np.array(ordinal, dtype=np.int64, copy=False) + data = np.array(ordinal, dtype=np.int64, copy=copy) return cls._simple_new(data, name=name, freq=freq) diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index 3e12cf14e7485..84f357481a28e 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -138,8 +138,9 @@ def __new__(cls, data=None, unit=None, if isinstance(data, TimedeltaIndex) and freq is None and name is None: if copy: - data = data.copy() - return data + return data.copy() + else: + return data._shallow_copy() freq_infer = False if not isinstance(freq, DateOffset): diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index 10276137b42a1..e515ba624d203 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -1739,7 +1739,7 @@ def test_join_self(self): kinds = 'outer', 'inner', 'left', 'right' for kind in kinds: joined = index.join(index, how=kind) - self.assertIs(index, joined) + tm.assert_index_equal(index, joined) def test_factorize(self): idx1 = TimedeltaIndex(['1 day', '1 day', '2 day', '2 day', '3 day', diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 03ccfcab24f58..50af4db21f593 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -991,7 +991,7 @@ def raise_assert_detail(obj, message, left, right): def assert_numpy_array_equal(left, right, strict_nan=False, check_dtype=True, err_msg=None, - obj='numpy array'): + obj='numpy array', check_same=None): """ Checks that 'np.ndarray' is equivalent Parameters @@ -1007,6 +1007,8 @@ def assert_numpy_array_equal(left, right, strict_nan=False, obj : str, default 'numpy array' Specify object name being compared, internally used to show appropriate assertion message + check_same : None|'copy'|'same', default None + Ensure "left" and "right refer/do not refer to the same memory area """ # instance validation @@ -1016,6 +1018,14 @@ def assert_numpy_array_equal(left, right, strict_nan=False, assertIsInstance(left, np.ndarray, '[ndarray] ') assertIsInstance(right, np.ndarray, '[ndarray] ') + def _get_base(obj): + return obj.base if getattr(obj, 'base', None) is not None else obj + + if check_same == 'same': + assertIs(_get_base(left), _get_base(right)) + elif check_same == 'copy': + assertIsNot(_get_base(left), _get_base(right)) + def _raise(left, right, err_msg): if err_msg is None: if left.shape != right.shape: