pandas-dev · jorisvandenbossche · Sep 28, 2018 · Oct 18, 2018 · Oct 18, 2018 · Oct 19, 2018
diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py
@@ -39,7 +39,8 @@ def _check(cls, inst):
                                         "float64index", "uint64index",
                                         "multiindex", "datetimeindex",
                                         "timedeltaindex", "periodindex",
-                                        "categoricalindex", "intervalindex"))
+                                        "categoricalindex", "intervalindex",
+                                        "extensionindex"))
 
 ABCSeries = create_pandas_abc_type("ABCSeries", "_typ", ("series", ))
 ABCDataFrame = create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe", ))

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -317,19 +317,14 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
             else:
                 return result
 
-        # extension dtype
-        elif is_extension_array_dtype(data) or is_extension_array_dtype(dtype):
-            data = np.asarray(data)
-            if not (dtype is None or is_object_dtype(dtype)):
-
-                # coerce to the provided dtype
-                data = dtype.construct_array_type()._from_sequence(
-                    data, dtype=dtype, copy=False)
-
-            # coerce to the object dtype
-            data = data.astype(object)
-            return Index(data, dtype=object, copy=copy, name=name,
-                         **kwargs)
+        elif (is_extension_array_dtype(data)
+                or is_extension_array_dtype(dtype)):
+            if dtype is not None and is_object_dtype(dtype):
+                data = np.asarray(data)
+                return Index(data, dtype=object, copy=copy, name=name,
+                             **kwargs)
+            from pandas.core.indexes.extension import ExtensionIndex
+            return ExtensionIndex(data, dtype=dtype, name=name)
 
         # index-like
         elif isinstance(data, (np.ndarray, Index, ABCSeries)):
@@ -2412,6 +2407,8 @@ def to_native_types(self, slicer=None, **kwargs):
             values = values[slicer]
         return values._format_native_types(**kwargs)
 
+    # TODO(EA) potentially overwrite for better implementation
+    # or use _formatting_values
     def _format_native_types(self, na_rep='', quoting=None, **kwargs):
         """ actually format my specific types """
         mask = isna(self)

diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py
@@ -0,0 +1,172 @@
+import numpy as np
+from pandas._libs import index as libindex
+
+# from pandas._libs import (lib, index as libindex, tslibs,
+#                           algos as libalgos, join as libjoin,
+#                           Timedelta)
+
+from pandas.compat.numpy import function as nv
+
+from pandas.core.arrays import ExtensionArray
+from pandas.core.dtypes.common import (
+    pandas_dtype,
+    ensure_platform_int,
+    is_dtype_equal,
+    is_integer_dtype,
+    is_float_dtype,
+    is_extension_array_dtype)
+from pandas.core.dtypes.generic import (
+    ABCSeries, ABCIndexClass
+)
+from pandas.util._decorators import (
+    Appender, cache_readonly)
+
+from .base import Index
+
+
+# _index_doc_kwargs = dict(ibase._index_doc_kwargs)
+# _index_doc_kwargs.update(
+#     dict(klass='IntervalIndex',
+#          target_klass='IntervalIndex or list of Intervals',
+#          name=textwrap.dedent("""\
+#          name : object, optional
+#               to be stored in the index.
+#          """),
+#          ))
+
+
+class ExtensionIndex(Index):
+    """
+    Index class that holds an ExtensionArray.
+
+    """
+    _typ = 'extensionindex'
+    _comparables = ['name']
+    _attributes = ['name']
+
+    _can_hold_na = True
+
+    @property
+    def _is_numeric_dtype(self):
+        return self.dtype._is_numeric
+
+    # TODO
+    # # would we like our indexing holder to defer to us
+    # _defer_to_indexing = False
+
+    # # prioritize current class for _shallow_copy_with_infer,
+    # # used to infer integers as datetime-likes
+    # _infer_as_myclass = False
+
+    def __new__(cls, *args, **kwargs):
+        return object.__new__(cls)
+
+    def __init__(self, data, dtype=None, name=None, copy=False, **kwargs):
+        # needs to accept and ignore kwargs eg for freq passed in
+        # Index._shallow_copy_with_infer
+
+        if name is None and hasattr(data, 'name'):
+            name = data.name
+
+        # unbox containers that can contain ExtensionArray
+        if isinstance(data, (ABCSeries, ABCIndexClass)):
+            data = data._values
+
+        # check dtype and coerce data to dtype if needed
+        if dtype is not None:
+            dtype = pandas_dtype(dtype)
+            if not is_extension_array_dtype(dtype):
+                raise ValueError(
+                    "The passed dtype should be an ExtensionDtype")
+            if not is_dtype_equal(getattr(data, 'dtype', None), dtype):
+                data = dtype.construct_array_type()._from_sequence(
+                    data, dtype=dtype, copy=False)
+
+        if not isinstance(data, ExtensionArray):
+            raise ValueError("passed data should be an ExtensionArray, or the "
+                             "passed dtype should be an ExtensionDtype")
+
+        if copy:
+            data = data.copy()
+
+        self._data = data
+        self.name = name
+
+    def __len__(self):
+        """
+        return the length of the Index
+        """
+        return len(self._data)
+
+    @property
+    def size(self):
+        # EA does not have .size
+        return len(self._data)
+
+    def __array__(self, dtype=None):
+        """ the array interface, return my values """
+        return np.array(self._data)
+
+    @cache_readonly
+    def dtype(self):
+        """ return the dtype object of the underlying data """
+        return self._values.dtype
+
+    @cache_readonly
+    def dtype_str(self):
+        """ return the dtype str of the underlying data """
+        return str(self.dtype)
+
+    @property
+    def _values(self):
+        return self._data
+
+    @property
+    def values(self):
+        """ return the underlying data as an ndarray """
+        return self._values
+
+    @cache_readonly
+    def _isnan(self):
+        """ return if each value is nan"""
+        return self._values.isna()
+
+    @cache_readonly
+    def _engine_type(self):
+        values, na_value = self._values._values_for_factorize()
+        if is_integer_dtype(values):
+            return libindex.Int64Engine
+        elif is_float_dtype(values):
+            return libindex.Float64Engine
+        # TODO add more
+        else:
+            return libindex.ObjectEngine
+
+    @cache_readonly
+    def _engine(self):
+        # property, for now, slow to look up
+        values, na_value = self._values._values_for_factorize()
+        return self._engine_type(lambda: values, len(self))
+
+    def _format_with_header(self, header, **kwargs):
+        return header + list(self._format_native_types(**kwargs))
+
+    @Appender(Index.take.__doc__)
+    def take(self, indices, axis=0, allow_fill=True, fill_value=None,
+             **kwargs):
+        if kwargs:
+            nv.validate_take(tuple(), kwargs)
+        indices = ensure_platform_int(indices)
+
+        result = self._data.take(indices, allow_fill=allow_fill,
+                                 fill_value=fill_value)
+        attributes = self._get_attributes_dict()
+        return self._simple_new(result, **attributes)
+
+    def __getitem__(self, value):
+        result = self._data[value]
+        if isinstance(result, self._data.__class__):
+            return self._shallow_copy(result)
+        else:
+            # scalar
+            return result
diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py
@@ -45,6 +45,8 @@ class TestMyDtype(BaseDtypeTests):
 from .dtype import BaseDtypeTests  # noqa
 from .getitem import BaseGetitemTests  # noqa
 from .groupby import BaseGroupbyTests  # noqa
+from .index import BaseIndexTests  # noqa
+
 from .interface import BaseInterfaceTests  # noqa
 from .methods import BaseMethodsTests  # noqa
 from .ops import BaseArithmeticOpsTests, BaseComparisonOpsTests, BaseOpsUtil  # noqa

diff --git a/pandas/tests/extension/base/index.py b/pandas/tests/extension/base/index.py
@@ -0,0 +1,96 @@
+import pytest
+import numpy as np
+
+import pandas as pd
+import pandas.util.testing as tm
+from pandas.core.indexes.extension import ExtensionIndex
+
+from .base import BaseExtensionTests
+
+
+class BaseIndexTests(BaseExtensionTests):
+    """Tests for ExtensionIndex."""
+
+    def test_constructor(self, data):
+        result = ExtensionIndex(data, name='test')
+        assert result.name == 'test'
+        self.assert_extension_array_equal(data, result._values)
+
+    def test_series_constructor(self, data):
+        result = pd.Series(range(len(data)), index=data)
+        assert isinstance(result.index, ExtensionIndex)
+
+    def test_asarray(self, data):
+        idx = ExtensionIndex(data)
+        tm.assert_numpy_array_equal(np.array(idx), np.array(data))
+
+    def test_repr(self, data):
+        idx = ExtensionIndex(data, name='test')
+        repr(idx)
+        s = pd.Series(range(len(data)), index=data)
+        repr(s)
+
+    def test_indexing_scalar(self, data):
+        s = pd.Series(range(len(data)), index=data)
+        label = data[1]
+        assert s[label] == 1
+        assert s.iloc[1] == 1
+        assert s.loc[label] == 1
+
+    def test_indexing_list(self, data):
+        s = pd.Series(range(len(data)), index=data)
+        labels = [data[1], data[3]]
+        exp = pd.Series([1, 3], index=data[[1, 3]])
+        self.assert_series_equal(s[labels], exp)
+        self.assert_series_equal(s.loc[labels], exp)
+        self.assert_series_equal(s.iloc[[1, 3]], exp)
+
+    def test_contains(self, data_missing, data_for_sorting, na_value):
+        idx = ExtensionIndex(data_missing)
+        assert data_missing[0] in idx
+        assert data_missing[1] in idx
+        assert na_value in idx
+        assert '__random' not in idx
+        idx = ExtensionIndex(data_for_sorting)
+        assert na_value not in idx
+
+    def test_na(self, data_missing):
+        idx = ExtensionIndex(data_missing)
+        result = idx.isna()
+        expected = np.array([True, False], dtype=bool)
+        tm.assert_numpy_array_equal(result, expected)
+        result = idx.notna()
+        tm.assert_numpy_array_equal(result, ~expected)
+        assert idx.hasnans #is True
+
+    def test_monotonic(self, data_for_sorting):
+        data = data_for_sorting
+        idx = ExtensionIndex(data)
+        assert idx.is_monotonic_increasing is False
+        assert idx.is_monotonic_decreasing is False
+
+        idx = ExtensionIndex(data[[2, 0, 1]])
+        assert idx.is_monotonic_increasing is True
+        assert idx.is_monotonic_decreasing is False
+
+        idx = ExtensionIndex(data[[1, 0, 2]])
+        assert idx.is_monotonic_increasing is False
+        assert idx.is_monotonic_decreasing is True
+
+    def test_is_unique(self, data_for_sorting, data_for_grouping):
+        idx = ExtensionIndex(data_for_sorting)
+        assert idx.is_unique is True
+
+        idx = ExtensionIndex(data_for_grouping)
+        assert idx.is_unique is False
+
+    def test_take(self, data):
+        idx = ExtensionIndex(data)
+        expected = ExtensionIndex(data.take([0, 2, 3]))
+        result = idx.take([0, 2, 3])
+        tm.assert_index_equal(result, expected)
+
+    def test_getitem(self, data):
+        idx = ExtensionIndex(data)
+        assert idx[0] == data[0]
+        tm.assert_index_equal(idx[[0, 1]], ExtensionIndex(data[[0, 1]]))
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
@@ -304,6 +304,10 @@ def test_compare_array(self, data, all_compare_operators):
         self._compare_other(s, data, op_name, other)
 
 
+class TestIndex(base.BaseIndexTests):
+    pass
+
+
 class DecimalArrayWithoutFromSequence(DecimalArray):
     """Helper class for testing error handling in _from_sequence."""
     def _from_sequence(cls, scalars, dtype=None, copy=False):

diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py
@@ -216,3 +216,7 @@ class TestNumericReduce(base.BaseNumericReduceTests):
 
 class TestBooleanReduce(base.BaseBooleanReduceTests):
     pass
+
+
+class TestIndex(base.BaseIndexTests):
+    pass
-Original file line number
+Diff line change
@@ Expand Up / @@ -216,3 +216,7 @@ class TestNumericReduce(base.BaseNumericReduceTests): @@
     class TestBooleanReduce(base.BaseBooleanReduceTests):
         pass
+    class TestIndex(base.BaseIndexTests):
+        pass