pandas-dev · jreback · Dec 28, 2018 · Nov 8, 2018 · Nov 9, 2018 · Nov 9, 2018
diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -702,6 +702,19 @@ strings and apply several methods to it. These can be accessed like
        Series.dt
        Index.str
 
+
+.. _api.arrays:
+
+Arrays
+------
+
+Pandas and third-party libraries can extend NumPy's type system (see :ref:`extending.extension-types`).
+
+.. autosummary::
+   :toctree: generated/
+
+   array
+
 .. _api.categorical:
 
 Categorical
@@ -790,6 +803,65 @@ following usable methods and properties:
    Series.cat.as_ordered
    Series.cat.as_unordered
 
+.. _api.arrays.integerna:
+
+Integer-NA
+~~~~~~~~~~
+
+:class:`arrays.IntegerArray` can hold integer data, potentially with missing
+values.
+
+.. autosummary::
+   :toctree: generated/
+
+   arrays.IntegerArray
+
+.. _api.arrays.interval:
+
+Interval
+~~~~~~~~
+
+:class:`IntervalArray` is an array for storing data representing intervals.
+The scalar type is a :class:`Interval`. These may be stored in a :class:`Series`
+or as a :class:`IntervalIndex`. :class:`IntervalArray` can be closed on the
+``'left'``, ``'right'``, or ``'both'``, or ``'neither'`` sides.
+See :ref:`indexing.intervallindex` for more.
+
+.. currentmodule:: pandas
+
+.. autosummary::
+   :toctree: generated/
+
+   IntervalArray
+
+.. _api.arrays.period:
+
+Period
+~~~~~~
+
+Periods represent a span of time (e.g. the year 2000, or the hour from 11:00 to 12:00
+on January 1st, 2000). A collection of :class:`Period` objects with a common frequency
+can be collected in a :class:`PeriodArray`. See :ref:`timeseries.periods` for more.
+
+.. autosummary::
+   :toctree: generated/
+
+   arrays.PeriodArray
+
+Sparse
+~~~~~~
+
+Sparse data may be stored and operated on more efficiently when there is a single value
+that's often repeated. :class:`SparseArray` is a container for this type of data.
+See :ref:`sparse` for more.
+
+.. _api.arrays.sparse:
+
+.. autosummary::
+   :toctree: generated/
+
+   SparseArray
+
 Plotting
 ~~~~~~~~
 
@@ -1676,6 +1748,7 @@ IntervalIndex Components
    IntervalIndex.get_indexer
    IntervalIndex.set_closed
    IntervalIndex.overlaps
+   IntervalArray.to_tuples
 
 
 .. _api.multiindex:
@@ -1907,6 +1980,8 @@ Methods
     PeriodIndex.strftime
     PeriodIndex.to_timestamp
 
+.. api.scalars:
+
 Scalars
 -------
 

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -152,6 +152,28 @@ Reduction and groupby operations such as 'sum' work.
 
    The Integer NA support currently uses the captilized dtype version, e.g. ``Int8`` as compared to the traditional ``int8``. This may be changed at a future date.
 
+.. _whatsnew_0240.enhancements.array:
+
+A new top-level method :func:`array` has been added for creating arrays (:issue:`22860`).
+This can be used to create any :ref:`extension array <extending.extension-types>`, including
+extension arrays registered by :ref:`3rd party libraries <ecosystem.extensions>`, or to
+create NumPy arrays.
+
+.. ipython:: python
+
+   pd.array([1, 2, np.nan], dtype='Int64')
+   pd.array(['a', 'b', 'c'], dtype='category')
+   pd.array([1, 2])
+
+Notice that the default return value, if no ``dtype`` is specified, the type of
+array is inferred from the data. In particular, note that the first example of
+``[1, 2, np.nan]`` will return a floating-point NumPy array, since ``NaN``
+is a float.
+
+.. ipython:: python
+
+   pd.array([1, 2, np.nan])
+
 .. _whatsnew_0240.enhancements.read_html:
 
 ``read_html`` Enhancements

diff --git a/pandas/__init__.py b/pandas/__init__.py
@@ -49,6 +49,7 @@
 from pandas.io.api import *
 from pandas.util._tester import test
 import pandas.testing
+import pandas.arrays
 
 # use the closest tagged version if possible
 from ._version import get_versions

diff --git a/pandas/arrays/__init__.py b/pandas/arrays/__init__.py
@@ -0,0 +1,17 @@
+"""
+All of pandas' ExtensionArrays and ExtensionDtypes.
+
+See :ref:`extending.extension-types` for more.
+"""
+from pandas.core.arrays import (
+    IntervalArray, PeriodArray, Categorical, SparseArray, IntegerArray,
+)
+
+
+__all__ = [
+    'Categorical',
+    'IntegerArray',
+    'IntervalArray',
+    'PeriodArray',
+    'SparseArray',
+]
diff --git a/pandas/core/api.py b/pandas/core/api.py
@@ -4,9 +4,26 @@
 
 import numpy as np
 
+from pandas.core.arrays import IntervalArray
+from pandas.core.arrays.integer import (
+    Int8Dtype,
+    Int16Dtype,
+    Int32Dtype,
+    Int64Dtype,
+    UInt8Dtype,
+    UInt16Dtype,
+    UInt32Dtype,
+    UInt64Dtype,
+)
 from pandas.core.algorithms import factorize, unique, value_counts
 from pandas.core.dtypes.missing import isna, isnull, notna, notnull
-from pandas.core.arrays import Categorical
+from pandas.core.dtypes.dtypes import (
+    CategoricalDtype,
+    PeriodDtype,
+    IntervalDtype,
+    DatetimeTZDtype,
+)
+from pandas.core.arrays import Categorical, array
 from pandas.core.groupby import Grouper
 from pandas.io.formats.format import set_eng_float_format
 from pandas.core.index import (Index, CategoricalIndex, Int64Index,

diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py
@@ -1,3 +1,4 @@
+from .array_ import array  # noqa
 from .base import (ExtensionArray,    # noqa
                    ExtensionOpsMixin,
                    ExtensionScalarOpsMixin)

diff --git a/pandas/core/arrays/array_.py b/pandas/core/arrays/array_.py
@@ -0,0 +1,184 @@
+import numpy as np
+
+from pandas._libs import lib, tslibs
+
+from pandas.core.dtypes.common import is_extension_array_dtype
+from pandas.core.dtypes.dtypes import registry
+from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
+
+from pandas import compat
+
+
+def array(data,         # type: Sequence[object]
+          dtype=None,   # type: Optional[Union[str, np.dtype, ExtensionDtype]]
+          copy=True,    # type: bool
+          ):
+    # type: (...) -> Union[str, np.dtype, ExtensionDtype]
+    """
+    Create an array.
+
+    .. versionadded:: 0.24.0
+
+    Parameters
+    ----------
+    data : Sequence of objects
+        The scalars inside `data` should be instances of the
+        scalar type for `dtype`.
+
+        When `data` is an Index or Series, the underlying array
+        will be extracted from `data`.
+
+    dtype : str, np.dtype, or ExtensionDtype, optional
+        The dtype to use for the array. This may be a NumPy
+        dtype or an extension type registered with pandas using
+        :meth:`pandas.api.extensions.register_extension_dtype`.
+
+        If not specified, there are two possibilities:
+
+        1. When `data` is a :class:`Series`, :class:`Index`, or
+           :class:`ExtensionArray`, the `dtype` will be taken
+           from the data.
+        2. Otherwise, pandas will attempt to infer the `dtype`
+           from the data.
+
+        Note that when `data` is a NumPy array, ``data.dtype`` is
+        *not* used for inferring the array type. This is because
+        NumPy cannot represent all the types of data that can be
+        held in extension arrays.
+
+        Currently, pandas will infer an extension dtype for sequences of
+
+        ========================== ==================================
+        scalar type                Array Type
+        ========================== ==================================
+        * :class:`pandas.Interval` :class:`pandas.IntervalArray`
+        * :class:`pandas.Period`   :class:`pandas.arrays.PeriodArray`
+        ========================== ==================================
+
+        For all other cases, NumPy's usual inference rules will be used.
+
+        To avoid *future* breaking changing, pandas recommends using actual
+        dtypes, and not string aliases, for `dtype`. In other words, use
+
+        >>> pd.array([1, 2, 3], dtype=np.dtype("int32"))
+        array([1, 2, 3], dtype=int32)
+
+        rather than
+
+        >>> pd.array([1, 2, 3], dtype="int32")
+        array([1, 2, 3], dtype=int32)
+
+        If and when pandas switches to a different backend for storing arrays,
+        the meaning of the string aliases will change, while the actual
+        dtypes will be unambiguous.
+
+    copy : bool, default True
+        Whether to copy the data, even if not necessary. Depending
+        on the type of `data`, creating the new array may require
+        copying data, even if ``copy=False``.
+
+    Returns
+    -------
+    array : Union[numpy.ndarray, ExtensionArray]
+
+    See Also
+    --------
+    numpy.array : Construct a NumPy array.
+    Series : Construct a pandas Series.
+
+    Notes
+    -----
+    Omitting the `dtype` argument means pandas will attempt to infer the
+    best array type from the values in the data. As new array types are
+    added by pandas and 3rd party libraries, the "best" array type may
+    change. We recommend specifying `dtype` to ensure that
+
+    1. the correct array type for the data is returned
+    2. the returned array type doesn't change as new extension types
+       are added by pandas and third-party libraries
+
+    Examples
+    --------
+    If a dtype is not specified, `data` is passed through to
+    :meth:`numpy.array`, and an ``ndarray`` is returned.
+
+    >>> pd.array([1, 2])
+    array([1, 2])
+
+    Or the NumPy dtype can be specified
+
+    >>> pd.array([1, 2], dtype=np.dtype("int32"))
+    array([1, 2], dtype=int32)
+
+    You can use the string alias for `dtype`
+
+    >>> pd.array(['a', 'b', 'a'], dtype='category')
+    [a, b, a]
+    Categories (2, object): [a, b]
+
+    Or specify the actual dtype
+
+    >>> pd.array(['a', 'b', 'a'],
+    ...          dtype=pd.CategoricalDtype(['a', 'b', 'c'], ordered=True))
+    [a, b, a]
+    Categories (3, object): [a < b < c]
+
+    Because omitting the `dtype` passes the data through to NumPy,
+    a mixture of valid integers and NA will return a floating-point
+    NumPy array.
+
+    >>> pd.array([1, 2, np.nan])
+    array([ 1.,  2., nan])
+
+    To use pandas' nullable :class:`pandas.arrays.IntegerArray`, specify
+    the dtype:
+
+    >>> pd.array([1, 2, np.nan], dtype='Int64')
+    IntegerArray([1, 2, nan], dtype='Int64')
+
+    Pandas will infer an ExtensionArray for some types of data:
+
+    >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")])
+    <PeriodArray>
+    ['2000-01-01', '2000-01-01']
+    Length: 2, dtype: period[D]
+    """
+    from pandas.core.arrays import (
+        period_array, ExtensionArray, IntervalArray
+    )
+
+    if isinstance(data, (ABCSeries, ABCIndexClass)):
+        data = data._values
+
+    if dtype is None and isinstance(data, ExtensionArray):
+        dtype = data.dtype
+
+    # this returns None for not-found dtypes.
+    if isinstance(dtype, compat.string_types):
+        dtype = registry.find(dtype) or dtype
+
+    if is_extension_array_dtype(dtype):
+        cls = dtype.construct_array_type()
+        return cls._from_sequence(data, dtype=dtype, copy=copy)
+
+    if dtype is None:
+        inferred_dtype = lib.infer_dtype(data)
+        if inferred_dtype == 'period':
+            try:
+                return period_array(data, copy=copy)
+            except tslibs.IncompatibleFrequency:
+                # We may have a mixture of frequencies.
+                # We choose to return an ndarray, rather than raising.
+                pass
+        elif inferred_dtype == 'interval':
+            try:
+                return IntervalArray(data, copy=copy)
+            except ValueError:
+                # We may have a mixture of `closed` here.
+                # We choose to return an ndarray, rather than raising.
+                pass
+
+        # TODO(DatetimeArray): handle this type
+        # TODO(BooleanArray): handle this type
+
+    return np.array(data, dtype=dtype, copy=copy)
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -81,7 +81,9 @@
 from_arrays
 from_tuples
 from_breaks
+overlaps
 set_closed
+to_tuples
 %(extra_methods)s\
 
 See Also