diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c
index 9662c59dddf4c..d0caeb3333548 100644
--- a/pandas/_libs/src/ujson/python/objToJSON.c
+++ b/pandas/_libs/src/ujson/python/objToJSON.c
@@ -228,6 +228,11 @@ static PyObject *get_values(PyObject *obj) {
     PRINTMARK();
 
     if (values && !PyArray_CheckExact(values)) {
+
+        if (PyObject_HasAttrString(values, "to_numpy")) {
+            values = PyObject_CallMethod(values, "to_numpy", NULL);
+        }
+
         if (PyObject_HasAttrString(values, "values")) {
             PyObject *subvals = get_values(values);
             PyErr_Clear();
@@ -279,8 +284,8 @@ static PyObject *get_values(PyObject *obj) {
             repr = PyString_FromString("<unknown dtype>");
         }
 
-        PyErr_Format(PyExc_ValueError, "%s or %s are not JSON serializable yet",
-                     PyString_AS_STRING(repr), PyString_AS_STRING(typeRepr));
+        PyErr_Format(PyExc_ValueError, "%R or %R are not JSON serializable yet",
+                     repr, typeRepr);
         Py_DECREF(repr);
         Py_DECREF(typeRepr);
 
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index ab5621d857e89..89fc90589a6e4 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -414,6 +414,17 @@ def _formatter(self, boxed=False):
     # ----------------------------------------------------------------
     # Array-Like / EA-Interface Methods
 
+    @property
+    def transpose(self):
+        # no-op because we are always 1-D
+        return self
+
+    T = transpose
+
+    def ravel(self, method=None):
+        # no-op because we are always 1-D
+        return self
+
     @property
     def nbytes(self):
         return self._data.nbytes
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 3f32b7b7dcea9..8fefbefc09eef 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -19,7 +19,7 @@
     is_extension_type, is_float_dtype, is_int64_dtype, is_object_dtype,
     is_period_dtype, is_string_dtype, is_timedelta64_dtype, pandas_dtype)
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
-from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
+from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries, ABCPandasArray
 from pandas.core.dtypes.missing import isna
 
 from pandas.core import ops
@@ -240,11 +240,14 @@ def _simple_new(cls, values, freq=None, tz=None):
         result._dtype = dtype
         return result
 
-    def __new__(cls, values, freq=None, tz=None, dtype=None, copy=False,
-                dayfirst=False, yearfirst=False, ambiguous='raise'):
-        return cls._from_sequence(
+    def __init__(self, values, freq=None, tz=None, dtype=None, copy=False,
+                 dayfirst=False, yearfirst=False, ambiguous='raise'):
+        result = type(self)._from_sequence(
             values, freq=freq, tz=tz, dtype=dtype, copy=copy,
             dayfirst=dayfirst, yearfirst=yearfirst, ambiguous=ambiguous)
+        self._data = result._data
+        self._freq = result._freq
+        self._dtype = result._dtype
 
     @classmethod
     def _from_sequence(cls, data, dtype=None, copy=False,
@@ -523,7 +526,9 @@ def _ndarray_values(self):
 
     @Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__)
     def _validate_fill_value(self, fill_value):
-        if isna(fill_value):
+        if isna(fill_value) or fill_value == iNaT:
+            # FIXME: shouldn't allow iNaT through here; see discussion
+            #  in GH#24024
             fill_value = iNaT
         elif isinstance(fill_value, (datetime, np.datetime64)):
             self._assert_tzawareness_compat(fill_value)
@@ -1568,6 +1573,8 @@ def sequence_to_dt64ns(data, dtype=None, copy=False,
         copy = False
     elif isinstance(data, ABCSeries):
         data = data._values
+    elif isinstance(data, ABCPandasArray):
+        data = data.to_numpy()
 
     if hasattr(data, "freq"):
         # i.e. DatetimeArray/Index
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index e1141c6b6b3a8..293ce7d8e4aca 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -10,8 +10,7 @@
 
 from pandas.core.dtypes.dtypes import (
     CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, ExtensionDtype,
-    IntervalDtype, PandasExtensionDtype, PeriodDtype, _pandas_registry,
-    registry)
+    IntervalDtype, PandasExtensionDtype, PeriodDtype, registry)
 from pandas.core.dtypes.generic import (
     ABCCategorical, ABCCategoricalIndex, ABCDateOffset, ABCDatetimeIndex,
     ABCIndexClass, ABCPeriodArray, ABCPeriodIndex, ABCSeries, ABCSparseArray,
@@ -1984,7 +1983,7 @@ def pandas_dtype(dtype):
         return dtype
 
     # registered extension types
-    result = _pandas_registry.find(dtype) or registry.find(dtype)
+    result = registry.find(dtype)
     if result is not None:
         return result
 
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 0501889d743d4..e6967ed2a4d3d 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -12,8 +12,8 @@
     is_extension_array_dtype, is_interval_dtype, is_object_dtype,
     is_period_dtype, is_sparse, is_timedelta64_dtype)
 from pandas.core.dtypes.generic import (
-    ABCDatetimeIndex, ABCPeriodIndex, ABCRangeIndex, ABCSparseDataFrame,
-    ABCTimedeltaIndex)
+    ABCDatetimeArray, ABCDatetimeIndex, ABCIndexClass, ABCPeriodIndex,
+    ABCRangeIndex, ABCSparseDataFrame, ABCTimedeltaIndex)
 
 from pandas import compat
 
@@ -471,7 +471,15 @@ def _concat_datetimetz(to_concat, name=None):
     all inputs must be DatetimeIndex
     it is used in DatetimeIndex.append also
     """
-    return to_concat[0]._concat_same_dtype(to_concat, name=name)
+    # Right now, internals will pass a List[DatetimeArray] here
+    # for reductions like quantile. I would like to disentangle
+    # all this before we get here.
+    sample = to_concat[0]
+
+    if isinstance(sample, ABCIndexClass):
+        return sample._concat_same_dtype(to_concat, name=name)
+    elif isinstance(sample, ABCDatetimeArray):
+        return sample._concat_same_type(to_concat)
 
 
 def _concat_index_same_dtype(indexes, klass=None):
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index e0d0cf3393dd5..b4a46ef049809 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -479,8 +479,8 @@ def _is_boolean(self):
         return is_bool_dtype(self.categories)
 
 
-class DatetimeTZDtype(PandasExtensionDtype):
-
+@register_extension_dtype
+class DatetimeTZDtype(PandasExtensionDtype, ExtensionDtype):
     """
     A np.dtype duck-typed class, suitable for holding a custom datetime with tz
     dtype.
@@ -493,6 +493,7 @@ class DatetimeTZDtype(PandasExtensionDtype):
     str = '|M8[ns]'
     num = 101
     base = np.dtype('M8[ns]')
+    na_value = NaT
     _metadata = ('unit', 'tz')
     _match = re.compile(r"(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]")
     _cache = {}
@@ -570,8 +571,8 @@ def construct_array_type(cls):
         -------
         type
         """
-        from pandas import DatetimeIndex
-        return DatetimeIndex
+        from pandas.core.arrays import DatetimeArrayMixin
+        return DatetimeArrayMixin
 
     @classmethod
     def construct_from_string(cls, string):
@@ -885,10 +886,3 @@ def is_dtype(cls, dtype):
             else:
                 return False
         return super(IntervalDtype, cls).is_dtype(dtype)
-
-
-# TODO(Extension): remove the second registry once all internal extension
-# dtypes are real extension dtypes.
-_pandas_registry = Registry()
-
-_pandas_registry.register(DatetimeTZDtype)
diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py
index bbc447d6fa0da..5c659b35bd0cc 100644
--- a/pandas/core/dtypes/generic.py
+++ b/pandas/core/dtypes/generic.py
@@ -67,6 +67,8 @@ def _check(cls, inst):
                                            ("extension",
                                             "categorical",
                                             "periodarray",
+                                            "datetimearray",
+                                            "timedeltaarray",
                                             "npy_extension",
                                             ))
 ABCPandasArray = create_pandas_abc_type("ABCPandasArray",
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index c9ed2521676ad..fca789030f9a4 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -35,11 +35,11 @@
 
 import pandas.core.algorithms as algos
 from pandas.core.arrays import (
-    Categorical, DatetimeArrayMixin as DatetimeArray, ExtensionArray)
+    Categorical, DatetimeArrayMixin as DatetimeArray, ExtensionArray,
+    TimedeltaArrayMixin as TimedeltaArray)
 from pandas.core.base import PandasObject
 import pandas.core.common as com
 from pandas.core.indexes.datetimes import DatetimeIndex
-from pandas.core.indexes.timedeltas import TimedeltaIndex
 from pandas.core.indexing import check_setitem_lengths
 from pandas.core.internals.arrays import extract_array
 import pandas.core.missing as missing
@@ -2169,7 +2169,7 @@ class DatetimeLikeBlockMixin(object):
 
     @property
     def _holder(self):
-        return DatetimeIndex
+        return DatetimeArray
 
     @property
     def _na_value(self):
@@ -2179,15 +2179,33 @@ def _na_value(self):
     def fill_value(self):
         return tslibs.iNaT
 
+    def to_dense(self):
+        # TODO(DatetimeBlock): remove
+        return np.asarray(self.values)
+
     def get_values(self, dtype=None):
         """
         return object dtype as boxed values, such as Timestamps/Timedelta
         """
         if is_object_dtype(dtype):
-            return lib.map_infer(self.values.ravel(),
-                                 self._box_func).reshape(self.values.shape)
+            values = self.values
+
+            if self.ndim > 1:
+                values = values.ravel()
+
+            values = lib.map_infer(values, self._box_func)
+
+            if self.ndim > 1:
+                values = values.reshape(self.values.shape)
+
+            return values
+
         return self.values
 
+    @property
+    def asi8(self):
+        return self.values.view('i8')
+
 
 class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock):
     __slots__ = ()
@@ -2198,13 +2216,15 @@ class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock):
     def __init__(self, values, placement, ndim=None):
         if values.dtype != _TD_DTYPE:
             values = conversion.ensure_timedelta64ns(values)
-
+        if isinstance(values, TimedeltaArray):
+            values = values._data
+        assert isinstance(values, np.ndarray), type(values)
         super(TimeDeltaBlock, self).__init__(values,
                                              placement=placement, ndim=ndim)
 
     @property
     def _holder(self):
-        return TimedeltaIndex
+        return TimedeltaArray
 
     @property
     def _box_func(self):
@@ -2299,6 +2319,9 @@ def to_native_types(self, slicer=None, na_rep=None, quoting=None,
                                        dtype=object)
         return rvalues
 
+    def external_values(self, dtype=None):
+        return np.asarray(self.values.astype("timedelta64[ns]", copy=False))
+
 
 class BoolBlock(NumericBlock):
     __slots__ = ()
@@ -2771,6 +2794,10 @@ def _maybe_coerce_values(self, values):
         """
         if values.dtype != _NS_DTYPE:
             values = conversion.ensure_datetime64ns(values)
+        if isinstance(values, DatetimeArray):
+            values = values._data
+
+        assert isinstance(values, np.ndarray), type(values)
         return values
 
     def _astype(self, dtype, **kwargs):
@@ -2857,15 +2884,17 @@ def to_native_types(self, slicer=None, na_rep=None, date_format=None,
         """ convert to our native types format, slicing if desired """
 
         values = self.values
+        i8values = self.asi8
+
         if slicer is not None:
-            values = values[..., slicer]
+            i8values = i8values[..., slicer]
 
         from pandas.io.formats.format import _get_format_datetime64_from_values
         format = _get_format_datetime64_from_values(values, date_format)
 
         result = tslib.format_array_from_datetime(
-            values.view('i8').ravel(), tz=getattr(self.values, 'tz', None),
-            format=format, na_rep=na_rep).reshape(values.shape)
+            i8values.ravel(), tz=getattr(self.values, 'tz', None),
+            format=format, na_rep=na_rep).reshape(i8values.shape)
         return np.atleast_2d(result)
 
     def should_store(self, value):
@@ -2885,12 +2914,16 @@ def set(self, locs, values, check=False):
 
         self.values[locs] = values
 
+    def external_values(self):
+        return np.asarray(self.values.astype('datetime64[ns]', copy=False))
 
-class DatetimeTZBlock(NonConsolidatableMixIn, DatetimeBlock):
+
+class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
     """ implement a datetime64 block with a tz attribute """
     __slots__ = ()
     _concatenator = staticmethod(_concat._concat_datetime)
     is_datetimetz = True
+    is_extension = True
 
     def __init__(self, values, placement, ndim=2, dtype=None):
         # XXX: This will end up calling _maybe_coerce_values twice
@@ -2905,6 +2938,10 @@ def __init__(self, values, placement, ndim=2, dtype=None):
         super(DatetimeTZBlock, self).__init__(values, placement=placement,
                                               ndim=ndim)
 
+    @property
+    def _holder(self):
+        return DatetimeArray
+
     def _maybe_coerce_values(self, values, dtype=None):
         """Input validation for values passed to __init__. Ensure that
         we have datetime64TZ, coercing if necessary.
@@ -2926,7 +2963,8 @@ def _maybe_coerce_values(self, values, dtype=None):
         if dtype is not None:
             if isinstance(dtype, compat.string_types):
                 dtype = DatetimeTZDtype.construct_from_string(dtype)
-            values = values._shallow_copy(tz=dtype.tz)
+            values = type(values)(values, dtype=dtype)
+            # TODO: shouldn't this be done via tz_convert?
 
         if values.tz is None:
             raise ValueError("cannot create a DatetimeTZBlock without a tz")
@@ -2936,8 +2974,8 @@ def _maybe_coerce_values(self, values, dtype=None):
     @property
     def is_view(self):
         """ return a boolean if I am possibly a view """
-        # check the ndarray values of the DatetimeIndex values
-        return self.values.values.base is not None
+        # check the ndarray values of the DatetimeArray values
+        return self.values._data.base is not None
 
     def copy(self, deep=True):
         """ copy constructor """
@@ -2946,18 +2984,40 @@ def copy(self, deep=True):
             values = values.copy(deep=True)
         return self.make_block_same_class(values)
 
-    def external_values(self):
-        """ we internally represent the data as a DatetimeIndex, but for
-        external compat with ndarray, export as a ndarray of Timestamps
+    def get_values(self, dtype=None):
         """
-        return self.values.astype('datetime64[ns]').values
+        Returns an ndarray of values.
 
-    def get_values(self, dtype=None):
+        Parameters
+        ----------
+        dtype : np.dtype
+            Only `object`-like dtypes are respected here (not sure
+            why).
+
+        Returns
+        -------
+        values : ndarray
+            When ``dtype=object``, then and object-dtype ndarray of
+            boxed values is returned. Otherwise, an M8[ns] ndarray
+            is returned.
+
+            DatetimeArray is always 1-d. ``get_values`` will reshape
+            the return value to be the same dimensionality as the
+            block.
+        """
         # return object dtype as Timestamps with the zones
+        values = self.values
         if is_object_dtype(dtype):
-            return lib.map_infer(
-                self.values.ravel(), self._box_func).reshape(self.values.shape)
-        return self.values
+            values = values._box_values(values._data)
+
+        values = np.asarray(values)
+
+        if self.ndim == 2:
+            # Ensure that our shape is correct for DataFrame.
+            # ExtensionArrays are always 1-D, even in a DataFrame when
+            # the analogous NumPy-backed column would be a 2-D ndarray.
+            values = values.reshape(1, -1)
+        return values
 
     def _slice(self, slicer):
         """ return a slice of my values """
@@ -2982,13 +3042,19 @@ def _try_coerce_args(self, values, other):
         base-type values, base-type other
         """
         # asi8 is a view, needs copy
-        values = _block_shape(values.asi8, ndim=self.ndim)
+        values = _block_shape(values.view("i8"), ndim=self.ndim)
 
         if isinstance(other, ABCSeries):
             other = self._holder(other)
 
         if isinstance(other, bool):
             raise TypeError
+        elif is_datetime64_dtype(other):
+            # add the tz back
+            # FIXME: we shouldn't be ravelling at this point, but Otherwise
+            #  this raises on tests.frame.test_quantile.test_quantile_box
+            other = self._holder(other.ravel(), dtype=self.dtype)
+
         elif (is_null_datelike_scalar(other) or
               (lib.is_scalar(other) and isna(other))):
             other = tslibs.iNaT
@@ -3023,7 +3089,8 @@ def _try_coerce_result(self, result):
                 result = result.reshape(np.prod(result.shape))
 
             # GH#24096 new values invalidates a frequency
-            result = self.values._shallow_copy(result, freq=None)
+            result = self._holder._simple_new(result, freq=None,
+                                              tz=self.values.tz)
 
         return result
 
@@ -3031,32 +3098,6 @@ def _try_coerce_result(self, result):
     def _box_func(self):
         return lambda x: tslibs.Timestamp(x, tz=self.dtype.tz)
 
-    def shift(self, periods, axis=0, fill_value=None):
-        """ shift the block by periods """
-
-        # think about moving this to the DatetimeIndex. This is a non-freq
-        # (number of periods) shift ###
-
-        N = len(self)
-        indexer = np.zeros(N, dtype=int)
-        if periods > 0:
-            indexer[periods:] = np.arange(N - periods)
-        else:
-            indexer[:periods] = np.arange(-periods, N)
-
-        new_values = self.values.asi8.take(indexer)
-
-        if isna(fill_value):
-            fill_value = tslibs.iNaT
-        if periods > 0:
-            new_values[:periods] = fill_value
-        else:
-            new_values[periods:] = fill_value
-
-        new_values = self.values._shallow_copy(new_values)
-        return [self.make_block_same_class(new_values,
-                                           placement=self.mgr_locs)]
-
     def diff(self, n, axis=0):
         """1st discrete difference
 
@@ -3086,14 +3127,45 @@ def diff(self, n, axis=0):
         return [TimeDeltaBlock(new_values, placement=self.mgr_locs.indexer)]
 
     def concat_same_type(self, to_concat, placement=None):
-        """
-        Concatenate list of single blocks of the same type.
-        """
-        values = self._concatenator([blk.values for blk in to_concat],
-                                    axis=self.ndim - 1)
-        # not using self.make_block_same_class as values can be non-tz dtype
-        return make_block(
-            values, placement=placement or slice(0, len(values), 1))
+        # need to handle concat([tz1, tz2]) here, since DatetimeArray
+        # only handles cases where all the tzs are the same.
+        # Instead of placing the condition here, it could also go into the
+        # is_uniform_join_units check, but I'm not sure what is better
+
+        if len({x.dtype for x in to_concat}) > 1:
+            values = _concat._concat_datetime([x.values for x in to_concat])
+            placement = placement or slice(0, len(values), 1)
+
+            if self.ndim > 1:
+                values = np.atleast_2d(values)
+            return ObjectBlock(values, ndim=self.ndim, placement=placement)
+        return super(DatetimeTZBlock, self).concat_same_type(to_concat,
+                                                             placement)
+
+    def fillna(self, value, limit=None, inplace=False, downcast=None):
+        # We support filling a DatetimeTZ with a `value` whose timezone
+        # is different by coercing to object.
+        try:
+            return super(DatetimeTZBlock, self).fillna(
+                value, limit, inplace, downcast
+            )
+        except (ValueError, TypeError):
+            # different timezones, or a non-tz
+            return self.astype(object).fillna(
+                value, limit=limit, inplace=inplace, downcast=downcast
+            )
+
+    def setitem(self, indexer, value):
+        # https://github.com/pandas-dev/pandas/issues/24020
+        # Need a dedicated setitem until #24020 (type promotion in setitem
+        # for extension arrays) is designed and implemented.
+        try:
+            return super(DatetimeTZBlock, self).setitem(indexer, value)
+        except (ValueError, TypeError):
+            newb = make_block(self.values.astype(object),
+                              placement=self.mgr_locs,
+                              klass=ObjectBlock,)
+            return newb.setitem(indexer, value)
 
 
 # -----------------------------------------------------------------
@@ -3115,8 +3187,16 @@ def get_block_type(values, dtype=None):
     dtype = dtype or values.dtype
     vtype = dtype.type
 
-    if is_categorical(values):
+    if is_sparse(dtype):
+        # Need this first(ish) so that Sparse[datetime] is sparse
+        cls = ExtensionBlock
+    elif is_categorical(values):
         cls = CategoricalBlock
+    elif issubclass(vtype, np.datetime64):
+        assert not is_datetime64tz_dtype(values)
+        cls = DatetimeBlock
+    elif is_datetime64tz_dtype(values):
+        cls = DatetimeTZBlock
     elif is_interval_dtype(dtype) or is_period_dtype(dtype):
         cls = ObjectValuesExtensionBlock
     elif is_extension_array_dtype(values):
@@ -3128,11 +3208,6 @@ def get_block_type(values, dtype=None):
         cls = TimeDeltaBlock
     elif issubclass(vtype, np.complexfloating):
         cls = ComplexBlock
-    elif issubclass(vtype, np.datetime64):
-        assert not is_datetime64tz_dtype(values)
-        cls = DatetimeBlock
-    elif is_datetime64tz_dtype(values):
-        cls = DatetimeTZBlock
     elif issubclass(vtype, np.integer):
         cls = IntBlock
     elif dtype == np.bool_:
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index b18b966406bbb..b3c893c7d84be 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -589,7 +589,7 @@ def sanitize_array(data, index, dtype=None, copy=False,
         # everything else in this block must also handle ndarray's,
         # becuase we've unwrapped PandasArray into an ndarray.
 
-        if dtype is not None and not data.dtype.is_dtype(dtype):
+        if dtype is not None:
             subarr = data.astype(dtype)
 
         if copy:
diff --git a/pandas/core/ops.py b/pandas/core/ops.py
index 7cab52ddda87f..3c966b036aac8 100644
--- a/pandas/core/ops.py
+++ b/pandas/core/ops.py
@@ -1539,17 +1539,20 @@ def wrapper(left, right):
             raise TypeError("{typ} cannot perform the operation "
                             "{op}".format(typ=type(left).__name__, op=str_rep))
 
-        elif (is_extension_array_dtype(left) or
-                (is_extension_array_dtype(right) and not is_scalar(right))):
-            # GH#22378 disallow scalar to exclude e.g. "category", "Int64"
-            return dispatch_to_extension_op(op, left, right)
-
         elif is_datetime64_dtype(left) or is_datetime64tz_dtype(left):
+            # Give dispatch_to_index_op a chance for tests like
+            # test_dt64_series_add_intlike, which the index dispatching handles
+            # specifically.
             result = dispatch_to_index_op(op, left, right, pd.DatetimeIndex)
             return construct_result(left, result,
                                     index=left.index, name=res_name,
                                     dtype=result.dtype)
 
+        elif (is_extension_array_dtype(left) or
+              (is_extension_array_dtype(right) and not is_scalar(right))):
+            # GH#22378 disallow scalar to exclude e.g. "category", "Int64"
+            return dispatch_to_extension_op(op, left, right)
+
         elif is_timedelta64_dtype(left):
             result = dispatch_to_index_op(op, left, right, pd.TimedeltaIndex)
             return construct_result(left, result,
diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
index 77dc04e9453a9..baffea3cd21d2 100644
--- a/pandas/tests/dtypes/test_dtypes.py
+++ b/pandas/tests/dtypes/test_dtypes.py
@@ -9,7 +9,7 @@
 
 from pandas.core.dtypes.dtypes import (
     DatetimeTZDtype, PeriodDtype,
-    IntervalDtype, CategoricalDtype, registry, _pandas_registry)
+    IntervalDtype, CategoricalDtype, registry)
 from pandas.core.dtypes.common import (
     is_categorical_dtype, is_categorical,
     is_datetime64tz_dtype, is_datetimetz,
@@ -804,14 +804,6 @@ def test_registry(dtype):
     assert dtype in registry.dtypes
 
 
-@pytest.mark.parametrize('dtype', [
-    DatetimeTZDtype,
-])
-def test_pandas_registry(dtype):
-    assert dtype not in registry.dtypes
-    assert dtype in _pandas_registry.dtypes
-
-
 @pytest.mark.parametrize('dtype, expected', [
     ('int64', None),
     ('interval', IntervalDtype()),
@@ -824,13 +816,6 @@ def test_registry_find(dtype, expected):
     assert registry.find(dtype) == expected
 
 
-@pytest.mark.parametrize('dtype, expected', [
-    ('datetime64[ns, US/Eastern]', DatetimeTZDtype('ns', 'US/Eastern')),
-])
-def test_pandas_registry_find(dtype, expected):
-    assert _pandas_registry.find(dtype) == expected
-
-
 @pytest.mark.parametrize('dtype, expected', [
     (str, False),
     (int, False),
diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
index d58b7ddf29123..bd50584406312 100644
--- a/pandas/tests/extension/json/array.py
+++ b/pandas/tests/extension/json/array.py
@@ -157,6 +157,12 @@ def astype(self, dtype, copy=True):
         # NumPy has issues when all the dicts are the same length.
         # np.array([UserDict(...), UserDict(...)]) fails,
         # but np.array([{...}, {...}]) works, so cast.
+
+        # needed to add this check for the Series constructor
+        if isinstance(dtype, type(self.dtype)) and dtype == self.dtype:
+            if copy:
+                return self.copy()
+            return self
         return np.array([dict(x) for x in self], dtype=dtype, copy=copy)
 
     def unique(self):
diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py
index 2bc4bf5df2298..db3f3b80bca6b 100644
--- a/pandas/tests/extension/test_common.py
+++ b/pandas/tests/extension/test_common.py
@@ -77,14 +77,6 @@ def test_astype_no_copy():
     assert arr is not result
 
 
-@pytest.mark.parametrize('dtype', [
-    dtypes.DatetimeTZDtype('ns', 'US/Central'),
-])
-def test_is_not_extension_array_dtype(dtype):
-    assert not isinstance(dtype, dtypes.ExtensionDtype)
-    assert not is_extension_array_dtype(dtype)
-
-
 @pytest.mark.parametrize('dtype', [
     dtypes.CategoricalDtype(),
     dtypes.IntervalDtype(),
diff --git a/pandas/tests/extension/test_datetime.py b/pandas/tests/extension/test_datetime.py
new file mode 100644
index 0000000000000..7c4491d6edbcf
--- /dev/null
+++ b/pandas/tests/extension/test_datetime.py
@@ -0,0 +1,237 @@
+import numpy as np
+import pytest
+
+from pandas.core.dtypes.dtypes import DatetimeTZDtype
+
+import pandas as pd
+from pandas.core.arrays import DatetimeArrayMixin as DatetimeArray
+from pandas.tests.extension import base
+
+
+@pytest.fixture(params=["US/Central"])
+def dtype(request):
+    return DatetimeTZDtype(unit="ns", tz=request.param)
+
+
+@pytest.fixture
+def data(dtype):
+    data = DatetimeArray(pd.date_range("2000", periods=100, tz=dtype.tz),
+                         dtype=dtype)
+    return data
+
+
+@pytest.fixture
+def data_missing(dtype):
+    return DatetimeArray(
+        np.array(['NaT', '2000-01-01'], dtype='datetime64[ns]'),
+        dtype=dtype
+    )
+
+
+@pytest.fixture
+def data_for_sorting(dtype):
+    a = pd.Timestamp('2000-01-01')
+    b = pd.Timestamp('2000-01-02')
+    c = pd.Timestamp('2000-01-03')
+    return DatetimeArray(np.array([b, c, a], dtype='datetime64[ns]'),
+                         dtype=dtype)
+
+
+@pytest.fixture
+def data_missing_for_sorting(dtype):
+    a = pd.Timestamp('2000-01-01')
+    b = pd.Timestamp('2000-01-02')
+    return DatetimeArray(np.array([b, 'NaT', a], dtype='datetime64[ns]'),
+                         dtype=dtype)
+
+
+@pytest.fixture
+def data_for_grouping(dtype):
+    """
+        Expected to be like [B, B, NA, NA, A, A, B, C]
+
+        Where A < B < C and NA is missing
+    """
+    a = pd.Timestamp('2000-01-01')
+    b = pd.Timestamp('2000-01-02')
+    c = pd.Timestamp('2000-01-03')
+    na = 'NaT'
+    return DatetimeArray(np.array([b, b, na, na, a, a, b, c],
+                                  dtype='datetime64[ns]'),
+                         dtype=dtype)
+
+
+@pytest.fixture
+def na_cmp():
+    def cmp(a, b):
+        return a is pd.NaT and a is b
+    return cmp
+
+
+@pytest.fixture
+def na_value():
+    return pd.NaT
+
+
+# ----------------------------------------------------------------------------
+class BaseDatetimeTests(object):
+    pass
+
+
+# ----------------------------------------------------------------------------
+# Tests
+class TestDatetimeDtype(BaseDatetimeTests, base.BaseDtypeTests):
+    pass
+
+
+class TestConstructors(BaseDatetimeTests, base.BaseConstructorsTests):
+    pass
+
+
+class TestGetitem(BaseDatetimeTests, base.BaseGetitemTests):
+    pass
+
+
+class TestMethods(BaseDatetimeTests, base.BaseMethodsTests):
+    @pytest.mark.skip(reason="Incorrect expected")
+    def test_value_counts(self, all_data, dropna):
+        pass
+
+    def test_combine_add(self, data_repeated):
+        # Timestamp.__add__(Timestamp) not defined
+        pass
+
+
+class TestInterface(BaseDatetimeTests, base.BaseInterfaceTests):
+
+    def test_array_interface(self, data):
+        if data.tz:
+            # np.asarray(DTA) is currently always tz-naive.
+            pytest.skip("GH-23569")
+        else:
+            super(TestInterface, self).test_array_interface(data)
+
+
+class TestArithmeticOps(BaseDatetimeTests, base.BaseArithmeticOpsTests):
+    implements = {'__sub__', '__rsub__'}
+
+    def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
+        if all_arithmetic_operators in self.implements:
+            s = pd.Series(data)
+            self.check_opname(s, all_arithmetic_operators, s.iloc[0],
+                              exc=None)
+        else:
+            # ... but not the rest.
+            super(TestArithmeticOps, self).test_arith_series_with_scalar(
+                data, all_arithmetic_operators
+            )
+
+    def test_add_series_with_extension_array(self, data):
+        # Datetime + Datetime not implemented
+        s = pd.Series(data)
+        msg = 'cannot add DatetimeArray(Mixin)? and DatetimeArray(Mixin)?'
+        with pytest.raises(TypeError, match=msg):
+            s + data
+
+    def test_arith_series_with_array(self, data, all_arithmetic_operators):
+        if all_arithmetic_operators in self.implements:
+            s = pd.Series(data)
+            self.check_opname(s, all_arithmetic_operators, s.iloc[0],
+                              exc=None)
+        else:
+            # ... but not the rest.
+            super(TestArithmeticOps, self).test_arith_series_with_scalar(
+                data, all_arithmetic_operators
+            )
+
+    def test_error(self, data, all_arithmetic_operators):
+        pass
+
+    @pytest.mark.xfail(reason="different implementation", strict=False)
+    def test_direct_arith_with_series_returns_not_implemented(self, data):
+        # Right now, we have trouble with this. Returning NotImplemented
+        # fails other tests like
+        # tests/arithmetic/test_datetime64::TestTimestampSeriesArithmetic::
+        # test_dt64_seris_add_intlike
+        return super(
+            TestArithmeticOps,
+            self
+        ).test_direct_arith_with_series_returns_not_implemented(data)
+
+
+class TestCasting(BaseDatetimeTests, base.BaseCastingTests):
+    pass
+
+
+class TestComparisonOps(BaseDatetimeTests, base.BaseComparisonOpsTests):
+
+    def _compare_other(self, s, data, op_name, other):
+        # the base test is not appropriate for us. We raise on comparison
+        # with (some) integers, depending on the value.
+        pass
+
+    @pytest.mark.xfail(reason="different implementation", strict=False)
+    def test_direct_arith_with_series_returns_not_implemented(self, data):
+        return super(
+            TestComparisonOps,
+            self
+        ).test_direct_arith_with_series_returns_not_implemented(data)
+
+
+class TestMissing(BaseDatetimeTests, base.BaseMissingTests):
+    pass
+
+
+class TestReshaping(BaseDatetimeTests, base.BaseReshapingTests):
+
+    @pytest.mark.skip(reason="We have DatetimeTZBlock")
+    def test_concat(self, data, in_frame):
+        pass
+
+    def test_concat_mixed_dtypes(self, data):
+        # concat(Series[datetimetz], Series[category]) uses a
+        # plain np.array(values) on the DatetimeArray, which
+        # drops the tz.
+        super(TestReshaping, self).test_concat_mixed_dtypes(data)
+
+    @pytest.mark.parametrize("obj", ["series", "frame"])
+    def test_unstack(self, obj):
+        # GH-13287: can't use base test, since building the expected fails.
+        data = DatetimeArray._from_sequence(['2000', '2001', '2002', '2003'],
+                                            tz='US/Central')
+        index = pd.MultiIndex.from_product(([['A', 'B'], ['a', 'b']]),
+                                           names=['a', 'b'])
+
+        if obj == "series":
+            ser = pd.Series(data, index=index)
+            expected = pd.DataFrame({
+                "A": data.take([0, 1]),
+                "B": data.take([2, 3])
+            }, index=pd.Index(['a', 'b'], name='b'))
+            expected.columns.name = 'a'
+
+        else:
+            ser = pd.DataFrame({"A": data, "B": data}, index=index)
+            expected = pd.DataFrame(
+                {("A", "A"): data.take([0, 1]),
+                 ("A", "B"): data.take([2, 3]),
+                 ("B", "A"): data.take([0, 1]),
+                 ("B", "B"): data.take([2, 3])},
+                index=pd.Index(['a', 'b'], name='b')
+            )
+            expected.columns.names = [None, 'a']
+
+        result = ser.unstack(0)
+        self.assert_equal(result, expected)
+
+
+class TestSetitem(BaseDatetimeTests, base.BaseSetitemTests):
+    pass
+
+
+class TestGroupby(BaseDatetimeTests, base.BaseGroupbyTests):
+    pass
+
+
+class TestPrinting(BaseDatetimeTests, base.BasePrintingTests):
+    pass
diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py
index a21d0104b0d04..b877ed93f07a2 100644
--- a/pandas/tests/frame/test_indexing.py
+++ b/pandas/tests/frame/test_indexing.py
@@ -3244,8 +3244,8 @@ def test_setitem(self):
         # are copies)
         b1 = df._data.blocks[1]
         b2 = df._data.blocks[2]
-        assert b1.values.equals(b2.values)
-        assert id(b1.values.values.base) != id(b2.values.values.base)
+        tm.assert_extension_array_equal(b1.values, b2.values)
+        assert id(b1.values._data.base) != id(b2.values._data.base)
 
         # with nan
         df2 = df.copy()
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
index 26cd39c4b807c..f7eb7e8443f55 100644
--- a/pandas/tests/internals/test_internals.py
+++ b/pandas/tests/internals/test_internals.py
@@ -11,8 +11,12 @@
 from distutils.version import LooseVersion
 import itertools
 from pandas import (Index, MultiIndex, DataFrame, DatetimeIndex,
-                    Series, Categorical, TimedeltaIndex, SparseArray)
+                    Series, Categorical, SparseArray)
 from pandas.compat import OrderedDict, lrange
+from pandas.core.arrays import (
+    DatetimeArrayMixin as DatetimeArray,
+    TimedeltaArrayMixin as TimedeltaArray,
+)
 from pandas.core.internals import (SingleBlockManager,
                                    make_block, BlockManager)
 import pandas.core.algorithms as algos
@@ -290,7 +294,7 @@ def test_make_block_same_class(self):
         block = create_block('M8[ns, US/Eastern]', [3])
         with tm.assert_produces_warning(DeprecationWarning,
                                         check_stacklevel=False):
-            block.make_block_same_class(block.values.values,
+            block.make_block_same_class(block.values,
                                         dtype=block.values.dtype)
 
 
@@ -451,7 +455,7 @@ def test_copy(self, mgr):
                 assert cp_blk.values.base is blk.values.base
             else:
                 # DatetimeTZBlock has DatetimeIndex values
-                assert cp_blk.values.values.base is blk.values.values.base
+                assert cp_blk.values._data.base is blk.values._data.base
 
         cp = mgr.copy(deep=True)
         for blk, cp_blk in zip(mgr.blocks, cp.blocks):
@@ -460,7 +464,7 @@ def test_copy(self, mgr):
             # some blocks it is an array (e.g. datetimetz), but was copied
             assert cp_blk.equals(blk)
             if not isinstance(cp_blk.values, np.ndarray):
-                assert cp_blk.values.values.base is not blk.values.values.base
+                assert cp_blk.values._data.base is not blk.values._data.base
             else:
                 assert cp_blk.values.base is None and blk.values.base is None
 
@@ -1258,9 +1262,9 @@ def test_binop_other(self, op, value, dtype):
 
 @pytest.mark.parametrize('typestr, holder', [
     ('category', Categorical),
-    ('M8[ns]', DatetimeIndex),
-    ('M8[ns, US/Central]', DatetimeIndex),
-    ('m8[ns]', TimedeltaIndex),
+    ('M8[ns]', DatetimeArray),
+    ('M8[ns, US/Central]', DatetimeArray),
+    ('m8[ns]', TimedeltaArray),
     ('sparse', SparseArray),
 ])
 def test_holder(typestr, holder):
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index 0706cb12ac5d0..4bc8d41d11823 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -1016,13 +1016,17 @@ def test_append_empty_frame_to_series_with_dateutil_tz(self):
         s = Series({'date': date, 'a': 1.0, 'b': 2.0})
         df = DataFrame(columns=['c', 'd'])
         result = df.append(s, ignore_index=True)
+        # n.b. it's not clear to me that expected is correct here.
+        # It's possible that the `date` column should have
+        # datetime64[ns, tz] dtype for both result and expected.
+        # that would be more consistent with new columns having
+        # their own dtype (float for a and b, datetime64ns, tz for date).
         expected = DataFrame([[np.nan, np.nan, 1., 2., date]],
-                             columns=['c', 'd', 'a', 'b', 'date'])
+                             columns=['c', 'd', 'a', 'b', 'date'],
+                             dtype=object)
         # These columns get cast to object after append
-        object_cols = ['c', 'd', 'date']
-        expected.loc[:, object_cols] = expected.loc[:, object_cols].astype(
-            object
-        )
+        expected['a'] = expected['a'].astype(float)
+        expected['b'] = expected['b'].astype(float)
         assert_frame_equal(result, expected)