pandas-dev · jreback · Nov 29, 2018 · Oct 30, 2018 · Nov 6, 2018 · Nov 11, 2018
diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst
@@ -137,7 +137,42 @@ However, operations such as slicing will also slice the index.
     s[[4, 3, 1]]
     np.exp(s)
 
-We will address array-based indexing in a separate :ref:`section <indexing>`.
+.. note::
+
+   We will address array-based indexing like ``s[[4, 3, 1]]``
+   in :ref:`section <indexing>`.
+
+Like a NumPy array, a pandas Series as a :attr:`Series.dtype`.
+
+.. ipython:: python
+
+   s.dtype
+
+This is often a NumPy dtype. However, pandas and 3rd-party libraries
+extend NumPy's type system in a few places, in which case the dtype would
+be a :class:`~pandas.api.extensions.ExtensionDtype`. Some examples within
+pandas are :ref:`categorical` and :ref:`integer_na`. See :ref:`dsintro.data_type` for more.
+
+If you need the actual array backing a ``Series``, use :attr:`Series.array`.
+
+.. ipython:: python
+
+   s.array
+
+Again, this is often a NumPy array, but may instead be a
+:class:`~pandas.api.extensions.ExtensionArray`. See :ref:`dsintro.data_type` for more.
+Accessing the array can be useful when you need to do some operation without the
+index (to disable :ref:`automatic alignment <dsintro.alignment>`, for example).
+
+While Series is ndarray-like, if you need an *actual* ndarray, then use
+:meth:`Series.to_numpy`.
+
+.. ipython:: python
+
+   s.to_numpy()
+
+Even if the Series is backed by a :class:`~pandas.api.extensions.ExtensionArray`,
+:meth:`Series.to_numpy` will return a NumPy ndarray.
 
 Series is dict-like
 ~~~~~~~~~~~~~~~~~~~
@@ -617,6 +652,8 @@ slicing, see the :ref:`section on indexing <indexing>`. We will address the
 fundamentals of reindexing / conforming to new sets of labels in the
 :ref:`section on reindexing <basics.reindexing>`.
 
+.. _dsintro.alignment:
+
 Data alignment and arithmetic
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -21,6 +21,46 @@ the user to override the engine's default behavior to include or omit the
 dataframe's indexes from the resulting Parquet file. (:issue:`20768`)
 - :meth:`DataFrame.corr` and :meth:`Series.corr` now accept a callable for generic calculation methods of correlation, e.g. histogram intersection (:issue:`22684`)
 
+.. _whatsnew_0240.values_api:
+
+Accessing the values in a Series or Index
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+:attr:`Series.array` and :attr:`Index.array` have been added for extracting the array backing a
+``Series`` or ``Index``.
+
+.. ipython:: python
+
+   idx = pd.period_range('2000', periods=4)
+   idx.array
+   pd.Series(idx).array
+
+Historically, this would have been done with ``series.values``, but with
+``.values`` it was unclear whether the returned value would be the actual array,
+some transformation of it, or one of pandas custom arrays (like
+``Categorical``). For example, with :class:`PeriodIndex`, ``.values`` generates
+a new ndarray of period objects each time.
+
+.. ipython:: python
+
+   id(idx.values)
+   id(idx.values)
+
+If you need an actual NumPy array, use :meth:`Series.to_numpy` or :meth:`Index.to_numpy`.
+
+.. ipython:: python
+
+   idx.to_numpy()
+   pd.Series(idx).to_numpy()
+
+For Series and Indexes backed by normal NumPy arrays, this will be the same thing (and the same
+as ``.values``).
+
+.. ipython:: python
+
+   ser = pd.Series([1, 2, 3])
+   ser.array
+   ser.to_numpy()
 
 .. _whatsnew_0240.enhancements.extension_array_operators:
 

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -765,6 +765,97 @@ def base(self):
                       FutureWarning, stacklevel=2)
         return self.values.base
 
+    @property
+    def array(self):
+        # type: () -> Union[np.ndarray, ExtensionArray]
+        """The actual Array backing this Series or Index.
+
+        Returns
+        -------
+        Union[ndarray, ExtensionArray]
+            This is the actual array stored within this object.
+
+        Notes
+        -----
+        This table lays out the different array types for each extension
+        dtype within pandas.
+
+        ================== =============================
+        dtype              array type
+        ================== =============================
+        category           Categorical
+        period             PeriodArray
+        interval           IntervalArray
+        IntegerNA          IntegerArray
+        datetime64[ns, tz] datetime64[ns]? DatetimeArray
+        ================== =============================
+
+        For any 3rd-party extension types, the array type will be an
+        ExtensionArray.
+
+        All remaining arrays (ndarrays), ``.array`` will be the ndarray
+        stored within.
+
+        See Also
+        --------
+        to_numpy : Similar method that always returns a NumPy array.
+
+        Examples
+        --------
+        >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a']))
+        >>> ser.array
+        [a, b, a]
+        Categories (2, object): [a, b]
+        """
+        return self._values
+
+    def to_numpy(self):
+        """A NumPy array representing the values in this Series or Index.
+
+        The returned array will be the same up to equality (values equal
+        in `self` will be equal in the returned array; likewise for values
+        that are not equal).
+
+        Returns
+        -------
+        numpy.ndarray
+            An ndarray with
+
+        Notes
+        -----
+        For NumPy arrays, this will be a reference to the actual data stored
+        in this Series or Index.
+
+        For extension types, this may involve copying data and coercing the
+        result to a NumPy type (possibly object), which may be expensive.
+
+        This table lays out the different array types for each extension
+        dtype within pandas.
+
+        ================== ================================
+        dtype              array type
+        ================== ================================
+        category[T]        ndarray[T] (same dtype as input)
+        period             ndarray[object] (Periods)
+        interval           ndarray[object] (Intervals)
+        IntegerNA          IntegerArray[object]
+        datetime64[ns, tz] datetime64[ns]? object?
+        ================== ================================
+
+        See Also
+        --------
+        array : Get the actual data stored within.
+
+        Examples
+        --------
+        >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a']))
+        >>> ser.to_numpy()
+        array(['a', 'b', 'a'], dtype=object)
+        """
+        if is_extension_array_dtype(self.dtype):
+            return np.asarray(self._values)
+        return self._values
+
     @property
     def _ndarray_values(self):
         # type: () -> np.ndarray

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -735,7 +735,7 @@ def _values(self):
         values
         _ndarray_values
         """
-        return self.values
+        return self._data
 
     def get_values(self):
         """

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -288,6 +288,26 @@ def _verify_integrity(self, labels=None, levels=None):
     def levels(self):
         return self._levels
 
+    @property
+    def _values(self):
+        # TODO: remove
+        # We override here, since our parent uses _data, which we dont' use.
+        return self.values
+
+    @property
+    def array(self):
+        """
+        Raises a ValueError for `MultiIndex` because there's no single
+        array backing a MultiIndex.
+
+        Raises
+        ------
+        ValueError
+        """
+        msg = ("MultiIndex has no single backing array. Use "
+               "'MultiIndex.to_numpy()' to get a NumPy array of tuples.")
+        raise ValueError(msg)
+
     @property
     def _is_homogeneous_type(self):
         """Whether the levels of a MultiIndex all have the same dtype.

diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
@@ -1269,3 +1269,54 @@ def test_ndarray_values(array, expected):
     r_values = pd.Index(array)._ndarray_values
     tm.assert_numpy_array_equal(l_values, r_values)
     tm.assert_numpy_array_equal(l_values, expected)
+
+
+@pytest.mark.parametrize("array, attr", [
+    (np.array([1, 2], dtype=np.int64), None),
+    (pd.Categorical(['a', 'b']), '_codes'),
+    (pd.core.arrays.period_array(['2000', '2001'], freq='D'), '_data'),
+    (pd.core.arrays.integer_array([0, np.nan]), '_data'),
+    (pd.core.arrays.IntervalArray.from_breaks([0, 1]), '_left'),
+    (pd.SparseArray([0, 1]), '_sparse_values'),
+    # TODO: DatetimeArray(add)
+])
+@pytest.mark.parametrize('box', [pd.Series, pd.Index])
+def test_array(array, attr, box):
+    if array.dtype.name in ('Int64', 'Sparse[int64, 0]'):
+        pytest.skip("No index type for {}".format(array.dtype))
+    result = box(array, copy=False).array
+
+    if attr:
+        array = getattr(array, attr)
+        result = getattr(result, attr)
+
+    assert result is array
+
+
+def test_array_multiindex_raises():
+    idx = pd.MultiIndex.from_product([['A'], ['a', 'b']])
+    with pytest.raises(ValueError, match='MultiIndex'):
+        idx.array
+
+
+@pytest.mark.parametrize('array, expected', [
+    (np.array([1, 2], dtype=np.int64), np.array([1, 2], dtype=np.int64)),
+    (pd.Categorical(['a', 'b']), np.array(['a', 'b'], dtype=object)),
+    (pd.core.arrays.period_array(['2000', '2001'], freq='D'),
+     np.array([pd.Period('2000', freq="D"), pd.Period('2001', freq='D')])),
+    (pd.core.arrays.integer_array([0, np.nan]),
+     np.array([1, np.nan], dtype=object)),
+    (pd.core.arrays.IntervalArray.from_breaks([0, 1, 2]),
+     np.array([pd.Interval(0, 1), pd.Interval(1, 2)], dtype=object)),
+    (pd.SparseArray([0, 1]), np.array([0, 1], dtype=np.int64)),
+    # TODO: DatetimeArray(add)
+])
+@pytest.mark.parametrize('box', [pd.Series, pd.Index])
+def test_to_numpy(array, expected, box):
+    thing = box(array)
+
+    if array.dtype.name in ('Int64', 'Sparse[int64, 0]'):
+        pytest.skip("No index type for {}".format(array.dtype))
+
+    result = thing.to_numpy()
+    tm.assert_numpy_array_equal(result, expected)