diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index c947758b8e8b1..f9888137d096b 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -401,4 +401,5 @@ Bug Fixes - Bug in ``io.common.get_filepath_or_buffer`` which caused reading of valid S3 files to fail if the bucket also contained keys for which the user does not have read permission (:issue:`10604`) - Bug in vectorised setting of timestamp columns with python ``datetime.date`` and numpy ``datetime64`` (:issue:`10408`, :issue:`10412`) - Bug in ``pd.DataFrame`` when constructing an empty DataFrame with a string dtype (:issue:`9428`) +- Bug in `Index` subtypes (such as `PeriodIndex`) not returning their own type for `.drop` and `.insert` methods diff --git a/pandas/core/index.py b/pandas/core/index.py index e62fd646ede56..1866ea274e325 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -89,7 +89,6 @@ class Index(IndexOpsMixin, PandasObject): _left_indexer = _algos.left_join_indexer_object _inner_indexer = _algos.inner_join_indexer_object _outer_indexer = _algos.outer_join_indexer_object - _box_scalars = False _typ = 'index' @@ -204,6 +203,17 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, @classmethod def _simple_new(cls, values, name=None, **kwargs): + """ + we require the we have a dtype compat for the values + if we are passed a non-dtype compat, then coerce using the constructor + + Must be careful not to recurse. + """ + if not hasattr(values, 'dtype'): + values = np.array(values,copy=False) + if is_object_dtype(values): + values = cls(values, name=name, **kwargs).values + result = object.__new__(cls) result._data = values result.name = name @@ -341,15 +351,41 @@ def view(self, cls=None): result._id = self._id return result - def _shallow_copy(self, values=None, **kwargs): - """ create a new Index, don't copy the data, use the same object attributes - with passed in attributes taking precedence """ + def _shallow_copy(self, values=None, infer=False, **kwargs): + """ + create a new Index, don't copy the data, use the same object attributes + with passed in attributes taking precedence + + *this is an internal non-public method* + + Parameters + ---------- + values : the values to create the new Index, optional + infer : boolean, default False + if True, infer the new type of the passed values + kwargs : updates the default attributes for this Index + """ if values is None: values = self.values attributes = self._get_attributes_dict() attributes.update(kwargs) + + if infer: + attributes['copy'] = False + return Index(values, **attributes) + return self.__class__._simple_new(values,**attributes) + def _coerce_scalar_to_index(self, item): + """ + we need to coerce a scalar to a compat for our index type + + Parameters + ---------- + item : scalar item to coerce + """ + return Index([item], dtype=self.dtype, **self._get_attributes_dict()) + def copy(self, names=None, name=None, dtype=None, deep=False): """ Make a copy of this object. Name and dtype sets those attributes on @@ -1132,7 +1168,9 @@ def append(self, other): appended : Index """ to_concat, name = self._ensure_compat_append(other) - return Index(np.concatenate(to_concat), name=name) + attribs = self._get_attributes_dict() + attribs['name'] = name + return self._shallow_copy(np.concatenate(to_concat), infer=True, **attribs) @staticmethod def _ensure_compat_concat(indexes): @@ -1549,7 +1587,11 @@ def sym_diff(self, other, result_name=None): if result_name is None: result_name = result_name_update the_diff = sorted(set((self.difference(other)).union(other.difference(self)))) - return Index(the_diff, name=result_name) + attribs = self._get_attributes_dict() + attribs['name'] = result_name + if 'freq' in attribs: + attribs['freq'] = None + return self._shallow_copy(the_diff, infer=True, **attribs) def get_loc(self, key, method=None): """ @@ -2527,7 +2569,8 @@ def delete(self, loc): ------- new_index : Index """ - return Index(np.delete(self._data, loc), name=self.name) + attribs = self._get_attributes_dict() + return self._shallow_copy(np.delete(self._data, loc), **attribs) def insert(self, loc, item): """ @@ -2544,10 +2587,12 @@ def insert(self, loc, item): new_index : Index """ _self = np.asarray(self) - item_idx = Index([item], dtype=self.dtype).values + item = self._coerce_scalar_to_index(item).values + idx = np.concatenate( - (_self[:loc], item_idx, _self[loc:])) - return Index(idx, name=self.name) + (_self[:loc], item, _self[loc:])) + attribs = self._get_attributes_dict() + return self._shallow_copy(idx, infer=True, **attribs) def drop(self, labels, errors='raise'): """ @@ -3678,7 +3723,7 @@ class MultiIndex(Index): Level of sortedness (must be lexicographically sorted by that level) names : optional sequence of objects - Names for each of the index levels. + Names for each of the index levels. (name is accepted for compat) copy : boolean, default False Copy the meta-data verify_integrity : boolean, default True @@ -3694,8 +3739,11 @@ class MultiIndex(Index): rename = Index.set_names def __new__(cls, levels=None, labels=None, sortorder=None, names=None, - copy=False, verify_integrity=True, _set_identity=True, **kwargs): + copy=False, verify_integrity=True, _set_identity=True, name=None, **kwargs): + # compat with Index + if name is not None: + names = name if levels is None or labels is None: raise TypeError("Must pass both levels and labels") if len(levels) != len(labels): @@ -4004,7 +4052,12 @@ def view(self, cls=None): result._id = self._id return result - _shallow_copy = view + def _shallow_copy(self, values=None, infer=False, **kwargs): + if values is not None: + if 'name' in kwargs: + kwargs['names'] = kwargs.pop('name',None) + return MultiIndex.from_tuples(values, **kwargs) + return self.view() @cache_readonly def dtype(self): diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 0b592368e2a1c..7a35123960a06 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -396,6 +396,40 @@ def test_symmetric_diff(self): with tm.assertRaisesRegexp(TypeError, msg): result = first.sym_diff([1, 2, 3]) + def test_insert_base(self): + + for name, idx in compat.iteritems(self.indices): + result = idx[1:4] + + if not len(idx): + continue + + #test 0th element + self.assertTrue(idx[0:4].equals( + result.insert(0, idx[0]))) + + def test_delete_base(self): + + for name, idx in compat.iteritems(self.indices): + + if not len(idx): + continue + + expected = idx[1:] + result = idx.delete(0) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + + expected = idx[:-1] + result = idx.delete(-1) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + + with tm.assertRaises((IndexError, ValueError)): + # either depending on numpy version + result = idx.delete(len(idx)) + + def test_equals_op(self): # GH9947, GH10637 index_a = self.create_index() diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 6cf972d4d7a8a..ec60edb6a78d6 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -5,7 +5,8 @@ import numpy as np from pandas.core.common import (_NS_DTYPE, _INT64_DTYPE, _values_from_object, _maybe_box, - ABCSeries, is_integer, is_float) + ABCSeries, is_integer, is_float, + is_object_dtype, is_datetime64_dtype) from pandas.core.index import Index, Int64Index, Float64Index import pandas.compat as compat from pandas.compat import u @@ -494,9 +495,16 @@ def _local_timestamps(self): @classmethod def _simple_new(cls, values, name=None, freq=None, tz=None, **kwargs): + """ + we require the we have a dtype compat for the values + if we are passed a non-dtype compat, then coerce using the constructor + """ + if not getattr(values,'dtype',None): values = np.array(values,copy=False) - if values.dtype != _NS_DTYPE: + if is_object_dtype(values): + return cls(values, name=name, freq=freq, tz=tz, **kwargs).values + elif not is_datetime64_dtype(values): values = com._ensure_int64(values).view(_NS_DTYPE) result = object.__new__(cls) diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 7606bd0bd86b8..242d9a7757556 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -19,7 +19,7 @@ import pandas.core.common as com from pandas.core.common import (isnull, _INT64_DTYPE, _maybe_box, _values_from_object, ABCSeries, - is_integer, is_float) + is_integer, is_float, is_object_dtype) from pandas import compat from pandas.lib import Timestamp, Timedelta import pandas.lib as lib @@ -259,6 +259,11 @@ def _from_arraylike(cls, data, freq, tz): @classmethod def _simple_new(cls, values, name=None, freq=None, **kwargs): + if not getattr(values,'dtype',None): + values = np.array(values,copy=False) + if is_object_dtype(values): + return PeriodIndex(values, name=name, freq=freq, **kwargs) + result = object.__new__(cls) result._data = values result.name = name @@ -266,6 +271,20 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs): result._reset_identity() return result + def _shallow_copy(self, values=None, infer=False, **kwargs): + """ we always want to return a PeriodIndex """ + return super(PeriodIndex, self)._shallow_copy(values=values, infer=False, **kwargs) + + def _coerce_scalar_to_index(self, item): + """ + we need to coerce a scalar to a compat for our index type + + Parameters + ---------- + item : scalar item to coerce + """ + return PeriodIndex([item], **self._get_attributes_dict()) + @property def _na_value(self): return self._box_func(tslib.iNaT)