pandas-dev
diff --git a/‎doc/source/cookbook.rst
+33-29 b/‎doc/source/cookbook.rst
+33-29
diff --git a/‎pandas/api/extensions/__init__.py
+2-2 b/‎pandas/api/extensions/__init__.py
+2-2
diff --git a/‎pandas/compat/pickle_compat.py
+3-1 b/‎pandas/compat/pickle_compat.py
+3-1
diff --git a/‎pandas/core/arrays/__init__.py
+1 b/‎pandas/core/arrays/__init__.py
+1
diff --git a/‎pandas/core/sparse/array.py renamed to ‎pandas/core/arrays/sparse.py
+250-2 b/‎pandas/core/sparse/array.py renamed to ‎pandas/core/arrays/sparse.py
+250-2
@@ -1228,36 +1228,40 @@ Correlation
 
 The `method` argument within `DataFrame.corr` can accept a callable in addition to the named correlation types.  Here we compute the `distance correlation <https://en.wikipedia.org/wiki/Distance_correlation>`__ matrix for a `DataFrame` object.
 
-.. ipython:: python
-
-    def distcorr(x, y):
-        n = len(x)
-        a = np.zeros(shape=(n, n))
-        b = np.zeros(shape=(n, n))
-
-        for i in range(n):
-            for j in range(i + 1, n):
-                a[i, j] = abs(x[i] - x[j])
-                b[i, j] = abs(y[i] - y[j])
-
-        a += a.T
-        b += b.T
-
-        a_bar = np.vstack([np.nanmean(a, axis=0)] * n)
-        b_bar = np.vstack([np.nanmean(b, axis=0)] * n)
-
-        A = a - a_bar - a_bar.T + np.full(shape=(n, n), fill_value=a_bar.mean())
-        B = b - b_bar - b_bar.T + np.full(shape=(n, n), fill_value=b_bar.mean())
-
-        cov_ab = np.sqrt(np.nansum(A * B)) / n
-        std_a = np.sqrt(np.sqrt(np.nansum(A**2)) / n)
-        std_b = np.sqrt(np.sqrt(np.nansum(B**2)) / n)
-
-        return cov_ab / std_a / std_b
-
-    df = pd.DataFrame(np.random.normal(size=(100, 3)))
+.. code-block:: python
 
-    df.corr(method=distcorr)
+    >>> def distcorr(x, y):
+    ...     n = len(x)
+    ...     a = np.zeros(shape=(n, n))
+    ...     b = np.zeros(shape=(n, n))
+    ...
+    ...     for i in range(n):
+    ...         for j in range(i + 1, n):
+    ...             a[i, j] = abs(x[i] - x[j])
+    ...             b[i, j] = abs(y[i] - y[j])
+    ...
+    ...     a += a.T
+    ...     b += b.T
+    ...
+    ...     a_bar = np.vstack([np.nanmean(a, axis=0)] * n)
+    ...     b_bar = np.vstack([np.nanmean(b, axis=0)] * n)
+    ...
+    ...     A = a - a_bar - a_bar.T + np.full(shape=(n, n), fill_value=a_bar.mean())
+    ...     B = b - b_bar - b_bar.T + np.full(shape=(n, n), fill_value=b_bar.mean())
+    ...
+    ...     cov_ab = np.sqrt(np.nansum(A * B)) / n
+    ...     std_a = np.sqrt(np.sqrt(np.nansum(A**2)) / n)
+    ...     std_b = np.sqrt(np.sqrt(np.nansum(B**2)) / n)
+    ...
+    ...     return cov_ab / std_a / std_b
+    ...
+    >>> df = pd.DataFrame(np.random.normal(size=(100, 3)))
+    ...
+    >>> df.corr(method=distcorr)
+              0         1         2
+    0  1.000000  0.171368  0.145302
+    1  0.171368  1.000000  0.189919
+    2  0.145302  0.189919  1.000000
 
 Timedeltas
 ----------
 
@@ -3,8 +3,8 @@
                                   register_index_accessor,
                                   register_series_accessor)
 from pandas.core.algorithms import take  # noqa
-from pandas.core.arrays.base import (ExtensionArray,    # noqa
-                                     ExtensionScalarOpsMixin)
+from pandas.core.arrays import (ExtensionArray,    # noqa
+                                ExtensionScalarOpsMixin)
 from pandas.core.dtypes.dtypes import (  # noqa
     ExtensionDtype, register_extension_dtype
 )
@@ -56,6 +56,8 @@ def load_reduce(self):
 
 # If classes are moved, provide compat here.
 _class_locations_map = {
+    ('pandas.core.sparse.array', 'SparseArray'):
+        ('pandas.core.arrays', 'SparseArray'),
 
     # 15477
     ('pandas.core.base', 'FrozenNDArray'):
@@ -88,7 +90,7 @@ def load_reduce(self):
 
     # 15998 top-level dirs moving
     ('pandas.sparse.array', 'SparseArray'):
-        ('pandas.core.sparse.array', 'SparseArray'),
+        ('pandas.core.arrays.sparse', 'SparseArray'),
     ('pandas.sparse.series', 'SparseSeries'):
         ('pandas.core.sparse.series', 'SparseSeries'),
     ('pandas.sparse.frame', 'SparseDataFrame'):
 
@@ -8,3 +8,4 @@
 from .timedeltas import TimedeltaArrayMixin  # noqa
 from .integer import (  # noqa
     IntegerArray, integer_array)
+from .sparse import SparseArray  # noqa
@@ -4,6 +4,7 @@
 from __future__ import division
 # pylint: disable=E1101,E1103,W0231
 
+import re
 import operator
 import numbers
 import numpy as np
@@ -16,8 +17,10 @@
 from pandas.errors import PerformanceWarning
 from pandas.compat.numpy import function as nv
 
-from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin
+from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin
 import pandas.core.common as com
+from pandas.core.dtypes.base import ExtensionDtype
+from pandas.core.dtypes.dtypes import register_extension_dtype
 from pandas.core.dtypes.generic import (
     ABCSparseSeries, ABCSeries, ABCIndexClass
 )
@@ -45,7 +48,252 @@
 import pandas.core.algorithms as algos
 import pandas.io.formats.printing as printing
 
-from pandas.core.sparse.dtype import SparseDtype
+
+# ----------------------------------------------------------------------------
+# Dtype
+
+@register_extension_dtype
+class SparseDtype(ExtensionDtype):
+    """
+    Dtype for data stored in :class:`SparseArray`.
+
+    This dtype implements the pandas ExtensionDtype interface.
+
+    .. versionadded:: 0.24.0
+
+    Parameters
+    ----------
+    dtype : str, ExtensionDtype, numpy.dtype, type, default numpy.float64
+        The dtype of the underlying array storing the non-fill value values.
+    fill_value : scalar, optional.
+        The scalar value not stored in the SparseArray. By default, this
+        depends on `dtype`.
+
+        ========== ==========
+        dtype      na_value
+        ========== ==========
+        float      ``np.nan``
+        int        ``0``
+        bool       ``False``
+        datetime64 ``pd.NaT``
+        timedelta64 ``pd.NaT``
+        ========== ==========
+
+        The default value may be overridden by specifying a `fill_value`.
+    """
+    # We include `_is_na_fill_value` in the metadata to avoid hash collisions
+    # between SparseDtype(float, 0.0) and SparseDtype(float, nan).
+    # Without is_na_fill_value in the comparison, those would be equal since
+    # hash(nan) is (sometimes?) 0.
+    _metadata = ('_dtype', '_fill_value', '_is_na_fill_value')
+
+    def __init__(self, dtype=np.float64, fill_value=None):
+        # type: (Union[str, np.dtype, 'ExtensionDtype', type], Any) -> None
+        from pandas.core.dtypes.missing import na_value_for_dtype
+        from pandas.core.dtypes.common import (
+            pandas_dtype, is_string_dtype, is_scalar
+        )
+
+        if isinstance(dtype, type(self)):
+            if fill_value is None:
+                fill_value = dtype.fill_value
+            dtype = dtype.subtype
+
+        dtype = pandas_dtype(dtype)
+        if is_string_dtype(dtype):
+            dtype = np.dtype('object')
+
+        if fill_value is None:
+            fill_value = na_value_for_dtype(dtype)
+
+        if not is_scalar(fill_value):
+            raise ValueError("fill_value must be a scalar. Got {} "
+                             "instead".format(fill_value))
+        self._dtype = dtype
+        self._fill_value = fill_value
+
+    def __hash__(self):
+        # Python3 doesn't inherit __hash__ when a base class overrides
+        # __eq__, so we explicitly do it here.
+        return super(SparseDtype, self).__hash__()
+
+    def __eq__(self, other):
+        # We have to override __eq__ to handle NA values in _metadata.
+        # The base class does simple == checks, which fail for NA.
+        if isinstance(other, compat.string_types):
+            try:
+                other = self.construct_from_string(other)
+            except TypeError:
+                return False
+
+        if isinstance(other, type(self)):
+            subtype = self.subtype == other.subtype
+            if self._is_na_fill_value:
+                # this case is complicated by two things:
+                # SparseDtype(float, float(nan)) == SparseDtype(float, np.nan)
+                # SparseDtype(float, np.nan)     != SparseDtype(float, pd.NaT)
+                # i.e. we want to treat any floating-point NaN as equal, but
+                # not a floating-point NaN and a datetime NaT.
+                fill_value = (
+                    other._is_na_fill_value and
+                    isinstance(self.fill_value, type(other.fill_value)) or
+                    isinstance(other.fill_value, type(self.fill_value))
+                )
+            else:
+                fill_value = self.fill_value == other.fill_value
+
+            return subtype and fill_value
+        return False
+
+    @property
+    def fill_value(self):
+        """
+        The fill value of the array.
+
+        Converting the SparseArray to a dense ndarray will fill the
+        array with this value.
+
+        .. warning::
+
+           It's possible to end up with a SparseArray that has ``fill_value``
+           values in ``sp_values``. This can occur, for example, when setting
+           ``SparseArray.fill_value`` directly.
+        """
+        return self._fill_value
+
+    @property
+    def _is_na_fill_value(self):
+        from pandas.core.dtypes.missing import isna
+        return isna(self.fill_value)
+
+    @property
+    def _is_numeric(self):
+        from pandas.core.dtypes.common import is_object_dtype
+        return not is_object_dtype(self.subtype)
+
+    @property
+    def _is_boolean(self):
+        from pandas.core.dtypes.common import is_bool_dtype
+        return is_bool_dtype(self.subtype)
+
+    @property
+    def kind(self):
+        return self.subtype.kind
+
+    @property
+    def type(self):
+        return self.subtype.type
+
+    @property
+    def subtype(self):
+        return self._dtype
+
+    @property
+    def name(self):
+        return 'Sparse[{}, {}]'.format(self.subtype.name, self.fill_value)
+
+    def __repr__(self):
+        return self.name
+
+    @classmethod
+    def construct_array_type(cls):
+        return SparseArray
+
+    @classmethod
+    def construct_from_string(cls, string):
+        """
+        Construct a SparseDtype from a string form.
+
+        Parameters
+        ----------
+        string : str
+            Can take the following forms.
+
+            string           dtype
+            ================ ============================
+            'int'            SparseDtype[np.int64, 0]
+            'Sparse'         SparseDtype[np.float64, nan]
+            'Sparse[int]'    SparseDtype[np.int64, 0]
+            'Sparse[int, 0]' SparseDtype[np.int64, 0]
+            ================ ============================
+
+            It is not possible to specify non-default fill values
+            with a string. An argument like ``'Sparse[int, 1]'``
+            will raise a ``TypeError`` because the default fill value
+            for integers is 0.
+
+        Returns
+        -------
+        SparseDtype
+        """
+        msg = "Could not construct SparseDtype from '{}'".format(string)
+        if string.startswith("Sparse"):
+            try:
+                sub_type, has_fill_value = cls._parse_subtype(string)
+                result = SparseDtype(sub_type)
+            except Exception:
+                raise TypeError(msg)
+            else:
+                msg = ("Could not construct SparseDtype from '{}'.\n\nIt "
+                       "looks like the fill_value in the string is not "
+                       "the default for the dtype. Non-default fill_values "
+                       "are not supported. Use the 'SparseDtype()' "
+                       "constructor instead.")
+                if has_fill_value and str(result) != string:
+                    raise TypeError(msg.format(string))
+                return result
+        else:
+            raise TypeError(msg)
+
+    @staticmethod
+    def _parse_subtype(dtype):
+        """
+        Parse a string to get the subtype
+
+        Parameters
+        ----------
+        dtype : str
+            A string like
+
+            * Sparse[subtype]
+            * Sparse[subtype, fill_value]
+
+        Returns
+        -------
+        subtype : str
+
+        Raises
+        ------
+        ValueError
+            When the subtype cannot be extracted.
+        """
+        xpr = re.compile(
+            r"Sparse\[(?P<subtype>[^,]*)(, )?(?P<fill_value>.*?)?\]$"
+        )
+        m = xpr.match(dtype)
+        has_fill_value = False
+        if m:
+            subtype = m.groupdict()['subtype']
+            has_fill_value = m.groupdict()['fill_value'] or has_fill_value
+        elif dtype == "Sparse":
+            subtype = 'float64'
+        else:
+            raise ValueError("Cannot parse {}".format(dtype))
+        return subtype, has_fill_value
+
+    @classmethod
+    def is_dtype(cls, dtype):
+        dtype = getattr(dtype, 'dtype', dtype)
+        if (isinstance(dtype, compat.string_types) and
+                dtype.startswith("Sparse")):
+            sub_type, _ = cls._parse_subtype(dtype)
+            dtype = np.dtype(sub_type)
+        elif isinstance(dtype, cls):
+            return True
+        return isinstance(dtype, np.dtype) or dtype == 'Sparse'
+
+# ----------------------------------------------------------------------------
+# Array
 
 
 _sparray_doc_kwargs = dict(klass='SparseArray')