diff --git a/pandas/tests/extension/integer/test_integer.py b/pandas/tests/arrays/test_integer.py
similarity index 70%
rename from pandas/tests/extension/integer/test_integer.py
rename to pandas/tests/arrays/test_integer.py
index 3af127091d2d8..349a6aee5701e 100644
--- a/pandas/tests/extension/integer/test_integer.py
+++ b/pandas/tests/arrays/test_integer.py
@@ -1,11 +1,10 @@
+# -*- coding: utf-8 -*-
 import numpy as np
 import pandas as pd
 import pandas.util.testing as tm
 import pytest
 
-from pandas.tests.extension import base
-from pandas.api.types import (
-    is_integer, is_scalar, is_float, is_float_dtype)
+from pandas.api.types import is_integer, is_float, is_float_dtype, is_scalar
 from pandas.core.dtypes.generic import ABCIndexClass
 
 from pandas.core.arrays import (
@@ -14,6 +13,8 @@
     Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype,
     UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype)
 
+from pandas.tests.extension.base import BaseOpsUtil
+
 
 def make_data():
     return (list(range(8)) +
@@ -39,42 +40,13 @@ def data_missing(dtype):
     return integer_array([np.nan, 1], dtype=dtype)
 
 
-@pytest.fixture
-def data_repeated(data):
-    def gen(count):
-        for _ in range(count):
-            yield data
-    yield gen
-
-
-@pytest.fixture
-def data_for_sorting(dtype):
-    return integer_array([1, 2, 0], dtype=dtype)
-
-
-@pytest.fixture
-def data_missing_for_sorting(dtype):
-    return integer_array([1, np.nan, 0], dtype=dtype)
-
-
-@pytest.fixture
-def na_cmp():
-    # we are np.nan
-    return lambda x, y: np.isnan(x) and np.isnan(y)
-
-
-@pytest.fixture
-def na_value():
-    return np.nan
-
-
-@pytest.fixture
-def data_for_grouping(dtype):
-    b = 1
-    a = 0
-    c = 2
-    na = np.nan
-    return integer_array([b, b, na, na, a, a, b, c], dtype=dtype)
+@pytest.fixture(params=['data', 'data_missing'])
+def all_data(request, data, data_missing):
+    """Parametrized fixture giving 'data' and 'data_missing'"""
+    if request.param == 'data':
+        return data
+    elif request.param == 'data_missing':
+        return data_missing
 
 
 def test_dtypes(dtype):
@@ -87,61 +59,50 @@ def test_dtypes(dtype):
     assert dtype.name is not None
 
 
-class BaseInteger(object):
-
-    def assert_index_equal(self, left, right, *args, **kwargs):
-
-        left_na = left.isna()
-        right_na = right.isna()
+class TestInterface(object):
 
-        tm.assert_numpy_array_equal(left_na, right_na)
-        return tm.assert_index_equal(left[~left_na],
-                                     right[~right_na],
-                                     *args, **kwargs)
-
-    def assert_series_equal(self, left, right, *args, **kwargs):
+    def test_repr_array(self, data):
+        result = repr(data)
 
-        left_na = left.isna()
-        right_na = right.isna()
+        # not long
+        assert '...' not in result
 
-        tm.assert_series_equal(left_na, right_na)
-        return tm.assert_series_equal(left[~left_na],
-                                      right[~right_na],
-                                      *args, **kwargs)
+        assert 'dtype=' in result
+        assert 'IntegerArray' in result
 
-    def assert_frame_equal(self, left, right, *args, **kwargs):
-        # TODO(EA): select_dtypes
-        tm.assert_index_equal(
-            left.columns, right.columns,
-            exact=kwargs.get('check_column_type', 'equiv'),
-            check_names=kwargs.get('check_names', True),
-            check_exact=kwargs.get('check_exact', False),
-            check_categorical=kwargs.get('check_categorical', True),
-            obj='{obj}.columns'.format(obj=kwargs.get('obj', 'DataFrame')))
+    def test_repr_array_long(self, data):
+        # some arrays may be able to assert a ... in the repr
+        with pd.option_context('display.max_seq_items', 1):
+            result = repr(data)
 
-        integers = (left.dtypes == 'integer').index
+            assert '...' in result
+            assert 'length' in result
 
-        for col in integers:
-            self.assert_series_equal(left[col], right[col],
-                                     *args, **kwargs)
 
-        left = left.drop(columns=integers)
-        right = right.drop(columns=integers)
-        tm.assert_frame_equal(left, right, *args, **kwargs)
+class TestConstructors(object):
 
+    def test_from_dtype_from_float(self, data):
+        # construct from our dtype & string dtype
+        dtype = data.dtype
 
-class TestDtype(BaseInteger, base.BaseDtypeTests):
+        # from float
+        expected = pd.Series(data)
+        result = pd.Series(np.array(data).astype('float'), dtype=str(dtype))
+        tm.assert_series_equal(result, expected)
 
-    @pytest.mark.skip(reason="using multiple dtypes")
-    def test_is_dtype_unboxes_dtype(self):
-        # we have multiple dtypes, so skip
-        pass
+        # from int / list
+        expected = pd.Series(data)
+        result = pd.Series(np.array(data).tolist(), dtype=str(dtype))
+        tm.assert_series_equal(result, expected)
 
-    def test_array_type_with_arg(self, data, dtype):
-        assert dtype.construct_array_type() is IntegerArray
+        # from int / array
+        expected = pd.Series(data).dropna().reset_index(drop=True)
+        dropped = np.array(data.dropna()).astype(np.dtype((dtype.type)))
+        result = pd.Series(dropped, dtype=str(dtype))
+        tm.assert_series_equal(result, expected)
 
 
-class TestArithmeticOps(BaseInteger, base.BaseArithmeticOpsTests):
+class TestArithmeticOps(BaseOpsUtil):
 
     def _check_divmod_op(self, s, op, other, exc=None):
         super(TestArithmeticOps, self)._check_divmod_op(s, op, other, None)
@@ -178,7 +139,7 @@ def _check_op_float(self, result, expected, mask, s, op_name, other):
         # check comparisions that are resulting in float dtypes
 
         expected[mask] = np.nan
-        self.assert_series_equal(result, expected)
+        tm.assert_series_equal(result, expected)
 
     def _check_op_integer(self, result, expected, mask, s, op_name, other):
         # check comparisions that are resulting in integer dtypes
@@ -231,10 +192,10 @@ def _check_op_integer(self, result, expected, mask, s, op_name, other):
 
             original = original.astype('float')
             original[mask] = np.nan
-            self.assert_series_equal(original, expected.astype('float'))
+            tm.assert_series_equal(original, expected.astype('float'))
 
         # assert our expected result
-        self.assert_series_equal(result, expected)
+        tm.assert_series_equal(result, expected)
 
     def test_arith_integer_array(self, data, all_arithmetic_operators):
         # we operate with a rhs of an integer array
@@ -319,7 +280,7 @@ def test_error(self, data, all_arithmetic_operators):
             opa(np.arange(len(s)).reshape(-1, len(s)))
 
 
-class TestComparisonOps(BaseInteger, base.BaseComparisonOpsTests):
+class TestComparisonOps(BaseOpsUtil):
 
     def _compare_other(self, s, data, op_name, other):
         op = self.get_op_from_name(op_name)
@@ -345,144 +306,21 @@ def _compare_other(self, s, data, op_name, other):
 
         tm.assert_series_equal(result, expected)
 
+    def test_compare_scalar(self, data, all_compare_operators):
+        op_name = all_compare_operators
+        s = pd.Series(data)
+        self._compare_other(s, data, op_name, 0)
 
-class TestInterface(BaseInteger, base.BaseInterfaceTests):
-
-    def test_repr_array(self, data):
-        result = repr(data)
-
-        # not long
-        assert '...' not in result
-
-        assert 'dtype=' in result
-        assert 'IntegerArray' in result
-
-    def test_repr_array_long(self, data):
-        # some arrays may be able to assert a ... in the repr
-        with pd.option_context('display.max_seq_items', 1):
-            result = repr(data)
-
-            assert '...' in result
-            assert 'length' in result
-
-
-class TestConstructors(BaseInteger, base.BaseConstructorsTests):
-
-    def test_from_dtype_from_float(self, data):
-        # construct from our dtype & string dtype
-        dtype = data.dtype
-
-        # from float
-        expected = pd.Series(data)
-        result = pd.Series(np.array(data).astype('float'), dtype=str(dtype))
-        self.assert_series_equal(result, expected)
-
-        # from int / list
-        expected = pd.Series(data)
-        result = pd.Series(np.array(data).tolist(), dtype=str(dtype))
-        self.assert_series_equal(result, expected)
-
-        # from int / array
-        expected = pd.Series(data).dropna().reset_index(drop=True)
-        dropped = np.array(data.dropna()).astype(np.dtype((dtype.type)))
-        result = pd.Series(dropped, dtype=str(dtype))
-        self.assert_series_equal(result, expected)
-
-
-class TestReshaping(BaseInteger, base.BaseReshapingTests):
-
-    def test_concat_mixed_dtypes(self, data):
-        # https://github.com/pandas-dev/pandas/issues/20762
-        df1 = pd.DataFrame({'A': data[:3]})
-        df2 = pd.DataFrame({"A": [1, 2, 3]})
-        df3 = pd.DataFrame({"A": ['a', 'b', 'c']}).astype('category')
-        df4 = pd.DataFrame({"A": pd.SparseArray([1, 2, 3])})
-        dfs = [df1, df2, df3, df4]
-
-        # dataframes
-        result = pd.concat(dfs)
-        expected = pd.concat([x.astype(object) for x in dfs])
-        self.assert_frame_equal(result, expected)
-
-        # series
-        result = pd.concat([x['A'] for x in dfs])
-        expected = pd.concat([x['A'].astype(object) for x in dfs])
-        self.assert_series_equal(result, expected)
-
-        result = pd.concat([df1, df2])
-        expected = pd.concat([df1.astype('object'), df2.astype('object')])
-        self.assert_frame_equal(result, expected)
-
-        # concat of an Integer and Int coerces to object dtype
-        # TODO(jreback) once integrated this would
-        # be a result of Integer
-        result = pd.concat([df1['A'], df2['A']])
-        expected = pd.concat([df1['A'].astype('object'),
-                              df2['A'].astype('object')])
-        self.assert_series_equal(result, expected)
-
-
-class TestGetitem(BaseInteger, base.BaseGetitemTests):
-    pass
+    def test_compare_array(self, data, all_compare_operators):
+        op_name = all_compare_operators
+        s = pd.Series(data)
+        other = pd.Series([0] * len(data))
+        self._compare_other(s, data, op_name, other)
 
 
-class TestMissing(BaseInteger, base.BaseMissingTests):
+class TestCasting(object):
     pass
 
-
-class TestMethods(BaseInteger, base.BaseMethodsTests):
-
-    @pytest.mark.parametrize('dropna', [True, False])
-    def test_value_counts(self, all_data, dropna):
-        all_data = all_data[:10]
-        if dropna:
-            other = np.array(all_data[~all_data.isna()])
-        else:
-            other = all_data
-
-        result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
-        expected = pd.Series(other).value_counts(
-            dropna=dropna).sort_index()
-        expected.index = expected.index.astype(all_data.dtype)
-
-        self.assert_series_equal(result, expected)
-
-    def test_combine_add(self, data_repeated):
-        # GH 20825
-        orig_data1, orig_data2 = data_repeated(2)
-        s1 = pd.Series(orig_data1)
-        s2 = pd.Series(orig_data2)
-
-        # fundamentally this is not a great operation
-        # as overflow / underflow can easily happen here
-        # e.g. int8 + int8
-        def scalar_add(a, b):
-
-            # TODO; should really be a type specific NA
-            if pd.isna(a) or pd.isna(b):
-                return np.nan
-            if is_integer(a):
-                a = int(a)
-            elif is_integer(b):
-                b = int(b)
-            return a + b
-
-        result = s1.combine(s2, scalar_add)
-        expected = pd.Series(
-            orig_data1._from_sequence([scalar_add(a, b) for (a, b) in
-                                       zip(orig_data1,
-                                           orig_data2)]))
-        self.assert_series_equal(result, expected)
-
-        val = s1.iloc[0]
-        result = s1.combine(val, lambda x1, x2: x1 + x2)
-        expected = pd.Series(
-            orig_data1._from_sequence([a + val for a in list(orig_data1)]))
-        self.assert_series_equal(result, expected)
-
-
-class TestCasting(BaseInteger, base.BaseCastingTests):
-
     @pytest.mark.parametrize('dropna', [True, False])
     def test_construct_index(self, all_data, dropna):
         # ensure that we do not coerce to Float64Index, rather
@@ -497,7 +335,7 @@ def test_construct_index(self, all_data, dropna):
         result = pd.Index(integer_array(other, dtype=all_data.dtype))
         expected = pd.Index(other, dtype=object)
 
-        self.assert_index_equal(result, expected)
+        tm.assert_index_equal(result, expected)
 
     @pytest.mark.parametrize('dropna', [True, False])
     def test_astype_index(self, all_data, dropna):
@@ -515,7 +353,7 @@ def test_astype_index(self, all_data, dropna):
 
         result = idx.astype(dtype)
         expected = idx.astype(object).astype(dtype)
-        self.assert_index_equal(result, expected)
+        tm.assert_index_equal(result, expected)
 
     def test_astype(self, all_data):
         all_data = all_data[:10]
@@ -528,13 +366,13 @@ def test_astype(self, all_data):
         s = pd.Series(ints)
         result = s.astype(all_data.dtype)
         expected = pd.Series(ints)
-        self.assert_series_equal(result, expected)
+        tm.assert_series_equal(result, expected)
 
         # coerce to same other - ints
         s = pd.Series(ints)
         result = s.astype(dtype)
         expected = pd.Series(ints, dtype=dtype)
-        self.assert_series_equal(result, expected)
+        tm.assert_series_equal(result, expected)
 
         # coerce to same numpy_dtype - ints
         s = pd.Series(ints)
@@ -547,13 +385,13 @@ def test_astype(self, all_data):
         s = pd.Series(mixed)
         result = s.astype(all_data.dtype)
         expected = pd.Series(mixed)
-        self.assert_series_equal(result, expected)
+        tm.assert_series_equal(result, expected)
 
         # coerce to same other - mixed
         s = pd.Series(mixed)
         result = s.astype(dtype)
         expected = pd.Series(mixed, dtype=dtype)
-        self.assert_series_equal(result, expected)
+        tm.assert_series_equal(result, expected)
 
         # coerce to same numpy_dtype - mixed
         s = pd.Series(mixed)
@@ -572,12 +410,12 @@ def test_astype_specific_casting(self, dtype):
         s = pd.Series([1, 2, 3], dtype='Int64')
         result = s.astype(dtype)
         expected = pd.Series([1, 2, 3], dtype=dtype)
-        self.assert_series_equal(result, expected)
+        tm.assert_series_equal(result, expected)
 
         s = pd.Series([1, 2, 3, None], dtype='Int64')
         result = s.astype(dtype)
         expected = pd.Series([1, 2, 3, None], dtype=dtype)
-        self.assert_series_equal(result, expected)
+        tm.assert_series_equal(result, expected)
 
     def test_construct_cast_invalid(self, dtype):
 
@@ -597,24 +435,6 @@ def test_construct_cast_invalid(self, dtype):
             pd.Series(arr).astype(dtype)
 
 
-class TestGroupby(BaseInteger, base.BaseGroupbyTests):
-
-    @pytest.mark.xfail(reason="groupby not working", strict=True)
-    def test_groupby_extension_no_sort(self, data_for_grouping):
-        super(TestGroupby, self).test_groupby_extension_no_sort(
-            data_for_grouping)
-
-    @pytest.mark.parametrize('as_index', [
-        pytest.param(True,
-                     marks=pytest.mark.xfail(reason="groupby not working",
-                                             strict=True)),
-        False
-    ])
-    def test_groupby_extension_agg(self, as_index, data_for_grouping):
-        super(TestGroupby, self).test_groupby_extension_agg(
-            as_index, data_for_grouping)
-
-
 def test_frame_repr(data_missing):
 
     df = pd.DataFrame({'A': data_missing})
diff --git a/pandas/tests/arrays/test_interval.py b/pandas/tests/arrays/test_interval.py
new file mode 100644
index 0000000000000..bcf4cea795978
--- /dev/null
+++ b/pandas/tests/arrays/test_interval.py
@@ -0,0 +1,72 @@
+# -*- coding: utf-8 -*-
+import pytest
+import numpy as np
+
+from pandas import Index, IntervalIndex, date_range, timedelta_range
+from pandas.core.arrays import IntervalArray
+import pandas.util.testing as tm
+
+
+@pytest.fixture(params=[
+    (Index([0, 2, 4]), Index([1, 3, 5])),
+    (Index([0., 1., 2.]), Index([1., 2., 3.])),
+    (timedelta_range('0 days', periods=3),
+     timedelta_range('1 day', periods=3)),
+    (date_range('20170101', periods=3), date_range('20170102', periods=3)),
+    (date_range('20170101', periods=3, tz='US/Eastern'),
+     date_range('20170102', periods=3, tz='US/Eastern'))],
+    ids=lambda x: str(x[0].dtype))
+def left_right_dtypes(request):
+    """
+    Fixture for building an IntervalArray from various dtypes
+    """
+    return request.param
+
+
+class TestMethods(object):
+
+    @pytest.mark.parametrize('repeats', [0, 1, 5])
+    def test_repeat(self, left_right_dtypes, repeats):
+        left, right = left_right_dtypes
+        result = IntervalArray.from_arrays(left, right).repeat(repeats)
+        expected = IntervalArray.from_arrays(
+            left.repeat(repeats), right.repeat(repeats))
+        tm.assert_extension_array_equal(result, expected)
+
+    @pytest.mark.parametrize('bad_repeats, msg', [
+        (-1, 'negative dimensions are not allowed'),
+        ('foo', r'invalid literal for (int|long)\(\) with base 10')])
+    def test_repeat_errors(self, bad_repeats, msg):
+        array = IntervalArray.from_breaks(range(4))
+        with tm.assert_raises_regex(ValueError, msg):
+            array.repeat(bad_repeats)
+
+    @pytest.mark.parametrize('new_closed', [
+        'left', 'right', 'both', 'neither'])
+    def test_set_closed(self, closed, new_closed):
+        # GH 21670
+        array = IntervalArray.from_breaks(range(10), closed=closed)
+        result = array.set_closed(new_closed)
+        expected = IntervalArray.from_breaks(range(10), closed=new_closed)
+        tm.assert_extension_array_equal(result, expected)
+
+
+class TestSetitem(object):
+
+    def test_set_na(self, left_right_dtypes):
+        left, right = left_right_dtypes
+        result = IntervalArray.from_arrays(left, right)
+        result[0] = np.nan
+
+        expected_left = Index([left._na_value] + list(left[1:]))
+        expected_right = Index([right._na_value] + list(right[1:]))
+        expected = IntervalArray.from_arrays(expected_left, expected_right)
+
+        tm.assert_extension_array_equal(result, expected)
+
+
+def test_repr_matches():
+    idx = IntervalIndex.from_breaks([1, 2, 3])
+    a = repr(idx)
+    b = repr(idx.values)
+    assert a.replace("Index", "Array") == b
diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
index c8656808739c4..4e7886dd2e943 100644
--- a/pandas/tests/extension/base/methods.py
+++ b/pandas/tests/extension/base/methods.py
@@ -127,10 +127,11 @@ def test_combine_add(self, data_repeated):
         s1 = pd.Series(orig_data1)
         s2 = pd.Series(orig_data2)
         result = s1.combine(s2, lambda x1, x2: x1 + x2)
-        expected = pd.Series(
-            orig_data1._from_sequence([a + b for (a, b) in
-                                       zip(list(orig_data1),
-                                           list(orig_data2))]))
+        with np.errstate(over='ignore'):
+            expected = pd.Series(
+                orig_data1._from_sequence([a + b for (a, b) in
+                                           zip(list(orig_data1),
+                                               list(orig_data2))]))
         self.assert_series_equal(result, expected)
 
         val = s1.iloc[0]
diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py
index f7bfdb8ec218a..05351c56862b8 100644
--- a/pandas/tests/extension/base/ops.py
+++ b/pandas/tests/extension/base/ops.py
@@ -23,9 +23,9 @@ def get_op_from_name(self, op_name):
     def check_opname(self, s, op_name, other, exc=NotImplementedError):
         op = self.get_op_from_name(op_name)
 
-        self._check_op(s, op, other, exc)
+        self._check_op(s, op, other, op_name, exc)
 
-    def _check_op(self, s, op, other, exc=NotImplementedError):
+    def _check_op(self, s, op, other, op_name, exc=NotImplementedError):
         if exc is None:
             result = op(s, other)
             expected = s.combine(other, op)
@@ -69,7 +69,8 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators):
         # ndarray & other series
         op_name = all_arithmetic_operators
         s = pd.Series(data)
-        self.check_opname(s, op_name, [s.iloc[0]] * len(s), exc=TypeError)
+        self.check_opname(s, op_name, pd.Series([s.iloc[0]] * len(s)),
+                          exc=TypeError)
 
     def test_divmod(self, data):
         s = pd.Series(data)
@@ -113,5 +114,5 @@ def test_compare_scalar(self, data, all_compare_operators):
     def test_compare_array(self, data, all_compare_operators):
         op_name = all_compare_operators
         s = pd.Series(data)
-        other = [0] * len(data)
+        other = pd.Series([data[0]] * len(data))
         self._compare_other(s, data, op_name, other)
diff --git a/pandas/tests/extension/category/__init__.py b/pandas/tests/extension/category/__init__.py
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/pandas/tests/extension/integer/__init__.py b/pandas/tests/extension/integer/__init__.py
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/pandas/tests/extension/interval/__init__.py b/pandas/tests/extension/interval/__init__.py
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/test_categorical.py
similarity index 85%
rename from pandas/tests/extension/category/test_categorical.py
rename to pandas/tests/extension/test_categorical.py
index 76f6b03907ef8..b8c73a9efdae8 100644
--- a/pandas/tests/extension/category/test_categorical.py
+++ b/pandas/tests/extension/test_categorical.py
@@ -1,3 +1,18 @@
+"""
+This file contains a minimal set of tests for compliance with the extension
+array interface test suite, and should contain no other tests.
+The test suite for the full functionality of the array is located in
+`pandas/tests/arrays/`.
+
+The tests in this file are inherited from the BaseExtensionTests, and only
+minimal tweaks should be applied to get the tests passing (by overwriting a
+parent method).
+
+Additional tests should either be added to one of the BaseExtensionTests
+classes (if they are relevant for the extension interface for all dtypes), or
+be added to the array-specific tests in `pandas/tests/arrays/`.
+
+"""
 import string
 
 import pytest
@@ -204,10 +219,14 @@ class TestComparisonOps(base.BaseComparisonOpsTests):
     def _compare_other(self, s, data, op_name, other):
         op = self.get_op_from_name(op_name)
         if op_name == '__eq__':
-            assert not op(data, other).all()
+            result = op(s, other)
+            expected = s.combine(other, lambda x, y: x == y)
+            assert (result == expected).all()
 
         elif op_name == '__ne__':
-            assert op(data, other).all()
+            result = op(s, other)
+            expected = s.combine(other, lambda x, y: x != y)
+            assert (result == expected).all()
 
         else:
             with pytest.raises(TypeError):
diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py
new file mode 100644
index 0000000000000..50c0e6dd8b347
--- /dev/null
+++ b/pandas/tests/extension/test_integer.py
@@ -0,0 +1,229 @@
+"""
+This file contains a minimal set of tests for compliance with the extension
+array interface test suite, and should contain no other tests.
+The test suite for the full functionality of the array is located in
+`pandas/tests/arrays/`.
+
+The tests in this file are inherited from the BaseExtensionTests, and only
+minimal tweaks should be applied to get the tests passing (by overwriting a
+parent method).
+
+Additional tests should either be added to one of the BaseExtensionTests
+classes (if they are relevant for the extension interface for all dtypes), or
+be added to the array-specific tests in `pandas/tests/arrays/`.
+
+"""
+import numpy as np
+import pandas as pd
+import pytest
+
+from pandas.tests.extension import base
+from pandas.core.dtypes.common import is_extension_array_dtype
+
+from pandas.core.arrays import IntegerArray, integer_array
+from pandas.core.arrays.integer import (
+    Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype,
+    UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype)
+
+
+def make_data():
+    return (list(range(1, 9)) + [np.nan] + list(range(10, 98))
+            + [np.nan] + [99, 100])
+
+
+@pytest.fixture(params=[Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype,
+                        UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype])
+def dtype(request):
+    return request.param()
+
+
+@pytest.fixture
+def data(dtype):
+    return integer_array(make_data(), dtype=dtype)
+
+
+@pytest.fixture
+def data_missing(dtype):
+    return integer_array([np.nan, 1], dtype=dtype)
+
+
+@pytest.fixture
+def data_repeated(data):
+    def gen(count):
+        for _ in range(count):
+            yield data
+    yield gen
+
+
+@pytest.fixture
+def data_for_sorting(dtype):
+    return integer_array([1, 2, 0], dtype=dtype)
+
+
+@pytest.fixture
+def data_missing_for_sorting(dtype):
+    return integer_array([1, np.nan, 0], dtype=dtype)
+
+
+@pytest.fixture
+def na_cmp():
+    # we are np.nan
+    return lambda x, y: np.isnan(x) and np.isnan(y)
+
+
+@pytest.fixture
+def na_value():
+    return np.nan
+
+
+@pytest.fixture
+def data_for_grouping(dtype):
+    b = 1
+    a = 0
+    c = 2
+    na = np.nan
+    return integer_array([b, b, na, na, a, a, b, c], dtype=dtype)
+
+
+class TestDtype(base.BaseDtypeTests):
+
+    @pytest.mark.skip(reason="using multiple dtypes")
+    def test_is_dtype_unboxes_dtype(self):
+        # we have multiple dtypes, so skip
+        pass
+
+    def test_array_type_with_arg(self, data, dtype):
+        assert dtype.construct_array_type() is IntegerArray
+
+
+class TestArithmeticOps(base.BaseArithmeticOpsTests):
+
+    def check_opname(self, s, op_name, other, exc=None):
+        # overwriting to indicate ops don't raise an error
+        super(TestArithmeticOps, self).check_opname(s, op_name,
+                                                    other, exc=None)
+
+    def _check_op(self, s, op, other, op_name, exc=NotImplementedError):
+        if exc is None:
+            if s.dtype.is_unsigned_integer and (op_name == '__rsub__'):
+                # TODO see https://github.com/pandas-dev/pandas/issues/22023
+                pytest.skip("unsigned subtraction gives negative values")
+
+            if (hasattr(other, 'dtype')
+                    and not is_extension_array_dtype(other.dtype)
+                    and pd.api.types.is_integer_dtype(other.dtype)):
+                # other is np.int64 and would therefore always result in
+                # upcasting, so keeping other as same numpy_dtype
+                other = other.astype(s.dtype.numpy_dtype)
+
+            result = op(s, other)
+            expected = s.combine(other, op)
+
+            if op_name == '__rdiv__':
+                # combine is not giving the correct result for this case
+                pytest.skip("skipping reverse div in python 2")
+            elif op_name in ('__rtruediv__', '__truediv__', '__div__'):
+                expected = expected.astype(float)
+                if op_name == '__rtruediv__':
+                    # TODO reverse operators result in object dtype
+                    result = result.astype(float)
+            elif op_name.startswith('__r'):
+                # TODO reverse operators result in object dtype
+                # see https://github.com/pandas-dev/pandas/issues/22024
+                expected = expected.astype(s.dtype)
+                result = result.astype(s.dtype)
+            else:
+                # combine method result in 'biggest' (int64) dtype
+                expected = expected.astype(s.dtype)
+                pass
+            if (op_name == '__rpow__') and isinstance(other, pd.Series):
+                # TODO pow on Int arrays gives different result with NA
+                # see https://github.com/pandas-dev/pandas/issues/22022
+                result = result.fillna(1)
+
+            self.assert_series_equal(result, expected)
+        else:
+            with pytest.raises(exc):
+                op(s, other)
+
+    def _check_divmod_op(self, s, op, other, exc=None):
+        super(TestArithmeticOps, self)._check_divmod_op(s, op, other, None)
+
+    @pytest.mark.skip(reason="intNA does not error on ops")
+    def test_error(self, data, all_arithmetic_operators):
+        # other specific errors tested in the integer array specific tests
+        pass
+
+
+class TestComparisonOps(base.BaseComparisonOpsTests):
+
+    def check_opname(self, s, op_name, other, exc=None):
+        super(TestComparisonOps, self).check_opname(s, op_name,
+                                                    other, exc=None)
+
+    def _compare_other(self, s, data, op_name, other):
+        self.check_opname(s, op_name, other)
+
+
+class TestInterface(base.BaseInterfaceTests):
+    pass
+
+
+class TestConstructors(base.BaseConstructorsTests):
+    pass
+
+
+class TestReshaping(base.BaseReshapingTests):
+    pass
+
+    # for test_concat_mixed_dtypes test
+    # concat of an Integer and Int coerces to object dtype
+    # TODO(jreback) once integrated this would
+
+
+class TestGetitem(base.BaseGetitemTests):
+    pass
+
+
+class TestMissing(base.BaseMissingTests):
+    pass
+
+
+class TestMethods(base.BaseMethodsTests):
+
+    @pytest.mark.parametrize('dropna', [True, False])
+    def test_value_counts(self, all_data, dropna):
+        all_data = all_data[:10]
+        if dropna:
+            other = np.array(all_data[~all_data.isna()])
+        else:
+            other = all_data
+
+        result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
+        expected = pd.Series(other).value_counts(
+            dropna=dropna).sort_index()
+        expected.index = expected.index.astype(all_data.dtype)
+
+        self.assert_series_equal(result, expected)
+
+
+class TestCasting(base.BaseCastingTests):
+    pass
+
+
+class TestGroupby(base.BaseGroupbyTests):
+
+    @pytest.mark.xfail(reason="groupby not working", strict=True)
+    def test_groupby_extension_no_sort(self, data_for_grouping):
+        super(TestGroupby, self).test_groupby_extension_no_sort(
+            data_for_grouping)
+
+    @pytest.mark.parametrize('as_index', [
+        pytest.param(True,
+                     marks=pytest.mark.xfail(reason="groupby not working",
+                                             strict=True)),
+        False
+    ])
+    def test_groupby_extension_agg(self, as_index, data_for_grouping):
+        super(TestGroupby, self).test_groupby_extension_agg(
+            as_index, data_for_grouping)
diff --git a/pandas/tests/extension/interval/test_interval.py b/pandas/tests/extension/test_interval.py
similarity index 54%
rename from pandas/tests/extension/interval/test_interval.py
rename to pandas/tests/extension/test_interval.py
index a10a56ddfdfac..625619a90ed4c 100644
--- a/pandas/tests/extension/interval/test_interval.py
+++ b/pandas/tests/extension/test_interval.py
@@ -1,7 +1,22 @@
+"""
+This file contains a minimal set of tests for compliance with the extension
+array interface test suite, and should contain no other tests.
+The test suite for the full functionality of the array is located in
+`pandas/tests/arrays/`.
+
+The tests in this file are inherited from the BaseExtensionTests, and only
+minimal tweaks should be applied to get the tests passing (by overwriting a
+parent method).
+
+Additional tests should either be added to one of the BaseExtensionTests
+classes (if they are relevant for the extension interface for all dtypes), or
+be added to the array-specific tests in `pandas/tests/arrays/`.
+
+"""
 import pytest
 import numpy as np
 
-from pandas import Index, Interval, IntervalIndex, date_range, timedelta_range
+from pandas import Interval
 from pandas.core.arrays import IntervalArray
 from pandas.core.dtypes.dtypes import IntervalDtype
 from pandas.tests.extension import base
@@ -15,22 +30,6 @@ def make_data():
     return [Interval(l, r) for l, r in zip(left, right)]
 
 
-@pytest.fixture(params=[
-    (Index([0, 2, 4]), Index([1, 3, 5])),
-    (Index([0., 1., 2.]), Index([1., 2., 3.])),
-    (timedelta_range('0 days', periods=3),
-     timedelta_range('1 day', periods=3)),
-    (date_range('20170101', periods=3), date_range('20170102', periods=3)),
-    (date_range('20170101', periods=3, tz='US/Eastern'),
-     date_range('20170102', periods=3, tz='US/Eastern'))],
-    ids=lambda x: str(x[0].dtype))
-def left_right_dtypes(request):
-    """
-    Fixture for building an IntervalArray from various dtypes
-    """
-    return request.param
-
-
 @pytest.fixture
 def dtype():
     return IntervalDtype()
@@ -111,30 +110,6 @@ class TestInterface(BaseInterval, base.BaseInterfaceTests):
 
 
 class TestMethods(BaseInterval, base.BaseMethodsTests):
-    @pytest.mark.parametrize('repeats', [0, 1, 5])
-    def test_repeat(self, left_right_dtypes, repeats):
-        left, right = left_right_dtypes
-        result = IntervalArray.from_arrays(left, right).repeat(repeats)
-        expected = IntervalArray.from_arrays(
-            left.repeat(repeats), right.repeat(repeats))
-        tm.assert_extension_array_equal(result, expected)
-
-    @pytest.mark.parametrize('bad_repeats, msg', [
-        (-1, 'negative dimensions are not allowed'),
-        ('foo', r'invalid literal for (int|long)\(\) with base 10')])
-    def test_repeat_errors(self, bad_repeats, msg):
-        array = IntervalArray.from_breaks(range(4))
-        with tm.assert_raises_regex(ValueError, msg):
-            array.repeat(bad_repeats)
-
-    @pytest.mark.parametrize('new_closed', [
-        'left', 'right', 'both', 'neither'])
-    def test_set_closed(self, closed, new_closed):
-        # GH 21670
-        array = IntervalArray.from_breaks(range(10), closed=closed)
-        result = array.set_closed(new_closed)
-        expected = IntervalArray.from_breaks(range(10), closed=new_closed)
-        tm.assert_extension_array_equal(result, expected)
 
     @pytest.mark.skip(reason='addition is not defined for intervals')
     def test_combine_add(self, data_repeated):
@@ -173,21 +148,4 @@ class TestReshaping(BaseInterval, base.BaseReshapingTests):
 
 
 class TestSetitem(BaseInterval, base.BaseSetitemTests):
-
-    def test_set_na(self, left_right_dtypes):
-        left, right = left_right_dtypes
-        result = IntervalArray.from_arrays(left, right)
-        result[0] = np.nan
-
-        expected_left = Index([left._na_value] + list(left[1:]))
-        expected_right = Index([right._na_value] + list(right[1:]))
-        expected = IntervalArray.from_arrays(expected_left, expected_right)
-
-        self.assert_extension_array_equal(result, expected)
-
-
-def test_repr_matches():
-    idx = IntervalIndex.from_breaks([1, 2, 3])
-    a = repr(idx)
-    b = repr(idx.values)
-    assert a.replace("Index", "Array") == b
+    pass