pandas-dev · jorisvandenbossche · Jan 7, 2020 · Dec 18, 2019 · Dec 21, 2019 · Dec 21, 2019
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -197,6 +197,7 @@ Other enhancements
 ^^^^^^^^^^^^^^^^^^
 
 - :meth:`DataFrame.to_string` added the ``max_colwidth`` parameter to control when wide columns are truncated (:issue:`9784`)
+- Added the ``na_value`` argument to :meth:`Series.to_numpy`, :meth:`Index.to_numpy` and :meth:`DataFrame.to_numpy` to control the value used for missing data (:issue:`30322`)
 - :meth:`MultiIndex.from_product` infers level names from inputs if not explicitly provided (:issue:`27292`)
 - :meth:`DataFrame.to_latex` now accepts ``caption`` and ``label`` arguments (:issue:`25436`)
 - The :ref:`integer dtype <integer_na>` with support for missing values and the
@@ -725,7 +726,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
 - Removed the previously deprecated keywords "how", "fill_method", and "limit" from :meth:`DataFrame.resample` (:issue:`30139`)
 - Passing an integer to :meth:`Series.fillna` or :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype now raises ``TypeError`` (:issue:`24694`)
 - Passing multiple axes to :meth:`DataFrame.dropna` is no longer supported (:issue:`20995`)
-- Removed :meth:`Series.nonzero`, use `to_numpy().nonzero()` instead (:issue:`24048`)
+- Removed :meth:`Series.nonzero`, use ``to_numpy().nonzero()`` instead (:issue:`24048`)
 - Passing floating dtype ``codes`` to :meth:`Categorical.from_codes` is no longer supported, pass ``codes.astype(np.int64)`` instead (:issue:`21775`)
 - Removed the previously deprecated keyword "pat" from :meth:`Series.str.partition` and :meth:`Series.str.rpartition`, use "sep" instead (:issue:`23767`)
 - Removed :meth:`Series.put` (:issue:`27106`)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -10,6 +10,7 @@
 
 import numpy as np
 
+from pandas._libs import lib
 from pandas._typing import ArrayLike
 from pandas.compat import set_function_name
 from pandas.compat.numpy import function as nv
@@ -350,6 +351,39 @@ def __iter__(self):
         for i in range(len(self)):
             yield self[i]
 
+    def to_numpy(self, dtype=None, copy=False, na_value=lib._no_default):
+        """
+        Convert to a NumPy ndarray.
+
+        .. versionadded:: 1.0.0
+
+        This is similar to :meth:`numpy.asarray`, but may provide additional control
+        over how the conversion is done.
+
+        Parameters
+        ----------
+        dtype : str or numpy.dtype, optional
+            The dtype to pass to :meth:`numpy.asarray`.
+        copy : bool, default False
+            Whether to ensure that the returned value is a not a view on
+            another array. Note that ``copy=False`` does not *ensure* that
+            ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
+            a copy is made, even if not strictly necessary.
+        na_value : Any, optional
+            The value to use for missing values. The default value depends
+            on `dtype` and the type of the array.
+
+        Returns
+        -------
+        numpy.ndarray
+        """
+        result = np.asarray(self, dtype=dtype)
+        if copy or na_value is not lib._no_default:
+            result = result.copy()
+        if na_value is not lib._no_default:
+            result[self.isna()] = na_value
+        return result
+
     # ------------------------------------------------------------------------
     # Required attributes
     # ------------------------------------------------------------------------

diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
@@ -327,29 +327,81 @@ def __getitem__(self, item):
 
         return type(self)(self._data[item], self._mask[item])
 
-    def _coerce_to_ndarray(self, dtype=None, na_value: "Scalar" = libmissing.NA):
+    def to_numpy(
+        self, dtype=None, copy=False, na_value: "Scalar" = lib._no_default, **kwargs
+    ):
         """
-        Coerce to an ndarray of object dtype or bool dtype (if force_bool=True).
+        Convert to a NumPy Array.
+
+        By default converts to an object-dtype NumPy array. Specify the `dtype` and
+        `na_value` keywords to customize the conversion.
 
         Parameters
         ----------
         dtype : dtype, default object
-            The numpy dtype to convert to
+            The numpy dtype to convert to.
+        copy : bool, default False
+            Whether to ensure that the returned value is a not a view on
+            the array. Note that ``copy=False`` does not *ensure* that
+            ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
+            a copy is made, even if not strictly necessary. This is typically
+            only possible when no missing values are present and `dtype`
+            is a boolean dtype.
         na_value : scalar, optional
              Scalar missing value indicator to use in numpy array. Defaults
              to the native missing value indicator of this array (pd.NA).
+
+        Returns
+        -------
+        numpy.ndarray
+
+        Examples
+        --------
+        An object-dtype is the default result
+
+        >>> a = pd.array([True, False], dtype="boolean")
+        >>> a.to_numpy()
+        array([True, False], dtype=object)
+
+        When no missing values are present, a boolean dtype can be used.
+
+        >>> a.to_numpy(dtype="bool")
+        array([ True, False])
+
+        However, requesting a bool dtype will raise a ValueError if
+        missing values are present and the default missing value :attr:`NA`
+        is used.
+
+        >>> a = pd.array([True, False, pd.NA], dtype="boolean")
+        >>> a
+        <BooleanArray>
+        [True, False, NA]
+        Length: 3, dtype: boolean
+
+        >>> a.to_numpy(dtype="bool")
+        Traceback (most recent call last):
+        ...
+        ValueError: cannot convert to bool numpy array in presence of missing values
+
+        Specify a valid `na_value` instead
+
+        >>> a.to_numpy(dtype="bool", na_value=False)
+        array([ True, False, False])
         """
+        if na_value is lib._no_default:
+            na_value = libmissing.NA
         if dtype is None:
             dtype = object
-        if is_bool_dtype(dtype):
-            if not self._hasna:
-                return self._data
-            else:
+        if self._hasna:
+            if is_bool_dtype(dtype) and na_value is libmissing.NA:
                 raise ValueError(
                     "cannot convert to bool numpy array in presence of missing values"
                 )
-        data = self._data.astype(dtype)
-        data[self._mask] = na_value
+            # don't pass copy to astype -> always need a copy since we are mutating
+            data = self._data.astype(dtype)
+            data[self._mask] = na_value
+        else:
+            data = self._data.astype(dtype, copy=copy)
         return data
 
     __array_priority__ = 1000  # higher than ndarray so ops dispatch to us
@@ -360,7 +412,7 @@ def __array__(self, dtype=None):
         We return an object array here to preserve our scalar values
         """
         # by default (no dtype specified), return an object array
-        return self._coerce_to_ndarray(dtype=dtype)
+        return self.to_numpy(dtype=dtype)
 
     def __arrow_array__(self, type=None):
         """
@@ -536,7 +588,7 @@ def astype(self, dtype, copy=True):
         if is_float_dtype(dtype):
             na_value = np.nan
         # coerce
-        data = self._coerce_to_ndarray(na_value=na_value)
+        data = self.to_numpy(na_value=na_value)
         return astype_nansafe(data, dtype, copy=False)
 
     def value_counts(self, dropna=True):

diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
@@ -421,7 +421,7 @@ def skew(self, axis=None, dtype=None, out=None, keepdims=False, skipna=True):
 
     # ------------------------------------------------------------------------
     # Additional Methods
-    def to_numpy(self, dtype=None, copy=False):
+    def to_numpy(self, dtype=None, copy=False, na_value=lib._no_default):
         """
         Convert the PandasArray to a :class:`numpy.ndarray`.
 
@@ -433,15 +433,21 @@ def to_numpy(self, dtype=None, copy=False):
             The NumPy dtype to pass to :func:`numpy.asarray`.
         copy : bool, default False
             Whether to copy the underlying data.
+        na_value : Scalar, optional
+            The missing value to use for missing values.
 
         Returns
         -------
         ndarray
         """
         result = np.asarray(self._ndarray, dtype=dtype)
-        if copy and result is self._ndarray:
+
+        if (copy or na_value is not lib._no_default) and result is self._ndarray:
             result = result.copy()
 
+        if na_value is not lib._no_default:
+            result[self.isna()] = na_value
+
         return result
 
     @Appender(ExtensionArray.searchsorted.__doc__)

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -19,7 +19,6 @@
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
     is_datetime64_ns_dtype,
-    is_datetime64tz_dtype,
     is_dict_like,
     is_extension_array_dtype,
     is_list_like,
@@ -785,7 +784,7 @@ def array(self) -> ExtensionArray:
 
         return result
 
-    def to_numpy(self, dtype=None, copy=False):
+    def to_numpy(self, dtype=None, copy=False, na_value=lib._no_default, **kwargs):
         """
         A NumPy ndarray representing the values in this Series or Index.
 
@@ -800,6 +799,17 @@ def to_numpy(self, dtype=None, copy=False):
             another array. Note that ``copy=False`` does not *ensure* that
             ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
             a copy is made, even if not strictly necessary.
+        na_value : Any, optional
+            The value to use for missing values. The default value depends
+            on `dtype` and the type of the array.
+
+            .. versionadded:: 1.0.0
+
+        **kwargs
+            Additional keywords passed through to the ``to_numpy`` method
+            of the underlying array (for extension arrays).
+
+            .. versionadded:: 1.0.0
 
         Returns
         -------
@@ -869,16 +879,21 @@ def to_numpy(self, dtype=None, copy=False):
         array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00...'],
               dtype='datetime64[ns]')
         """
-        if is_datetime64tz_dtype(self.dtype) and dtype is None:
-            # note: this is going to change very soon.
-            # I have a WIP PR making this unnecessary, but it's
-            # a bit out of scope for the DatetimeArray PR.
-            dtype = "object"
+        if is_extension_array_dtype(self.dtype):
+            return self.array.to_numpy(dtype, copy=copy, na_value=na_value, **kwargs)
+        else:
+            if kwargs:
+                msg = "to_numpy() got an unexpected keyword argument '{}'".format(
+                    list(kwargs.keys())[0]
+                )
+                raise TypeError(msg)
 
         result = np.asarray(self._values, dtype=dtype)
         # TODO(GH-24345): Avoid potential double copy
-        if copy:
+        if copy or na_value is not lib._no_default:
             result = result.copy()
+            if na_value is not lib._no_default:
+                result[self.isna()] = na_value
         return result
 
     @property

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1242,7 +1242,7 @@ def from_dict(cls, data, orient="columns", dtype=None, columns=None):
 
         return cls(data, index=index, columns=columns, dtype=dtype)
 
-    def to_numpy(self, dtype=None, copy=False):
+    def to_numpy(self, dtype=None, copy=False, na_value=lib._no_default):
         """
         Convert the DataFrame to a NumPy array.
 
@@ -1264,6 +1264,12 @@ def to_numpy(self, dtype=None, copy=False):
             ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
             a copy is made, even if not strictly necessary.
 
+        na_value : Any, optional
+            The value to use for missing values. The default value depends
+            on `dtype` and the type of the array.
+
+            .. versionadded:: 1.0.0
+
         Returns
         -------
         numpy.ndarray
@@ -1295,6 +1301,13 @@ def to_numpy(self, dtype=None, copy=False):
                [2, 4.5, Timestamp('2000-01-02 00:00:00')]], dtype=object)
         """
         result = np.array(self.values, dtype=dtype, copy=copy)
+        if na_value is not lib._no_default:
+            if not copy:
+                # copy even if not requested. This may be unnecessary
+                # if NumPy already copied.
+                result = result.copy()
+
+            result[self.isna()] = na_value
         return result
 
     def to_dict(self, orient="dict", into=dict):

diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py
@@ -251,6 +251,70 @@ def test_coerce_to_numpy_array():
         np.array(arr, dtype="bool")
 
 
+@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
+def test_to_numpy(box):
+    con = pd.Series if box else pd.array
+    # default (with or without missing values) -> object dtype
+    arr = con([True, False, True], dtype="boolean")
+    result = arr.to_numpy()
+    expected = np.array([True, False, True], dtype="object")
+    tm.assert_numpy_array_equal(result, expected)
+
+    arr = con([True, False, None], dtype="boolean")
+    result = arr.to_numpy()
+    expected = np.array([True, False, pd.NA], dtype="object")
+    tm.assert_numpy_array_equal(result, expected)
+
+    # no missing values -> can convert to bool, otherwise raises
+    arr = con([True, False, True], dtype="boolean")
+    result = arr.to_numpy(dtype="bool")
+    expected = np.array([True, False, True], dtype="bool")
+    tm.assert_numpy_array_equal(result, expected)
+
+    arr = con([True, False, None], dtype="boolean")
+    with pytest.raises(ValueError, match="cannot convert to bool numpy"):
+        result = arr.to_numpy(dtype="bool")
+
+    # specify dtype and na_value
+    arr = con([True, False, None], dtype="boolean")
+    result = arr.to_numpy(dtype=object, na_value=None)
+    expected = np.array([True, False, None], dtype="object")
+    tm.assert_numpy_array_equal(result, expected)
+
+    result = arr.to_numpy(dtype=bool, na_value=False)
+    expected = np.array([True, False, False], dtype="bool")
+    tm.assert_numpy_array_equal(result, expected)
+
+    result = arr.to_numpy(dtype="int64", na_value=-99)
+    expected = np.array([1, 0, -99], dtype="int64")
+    tm.assert_numpy_array_equal(result, expected)
+
+    result = arr.to_numpy(dtype="float64", na_value=np.nan)
+    expected = np.array([1, 0, np.nan], dtype="float64")
+    tm.assert_numpy_array_equal(result, expected)
+
+    # converting to int or float without specifying na_value raises
+    with pytest.raises(TypeError):
+        arr.to_numpy(dtype="int64")
+    with pytest.raises(TypeError):
+        arr.to_numpy(dtype="float64")
+
+
+def test_to_numpy_copy():
+    # to_numpy can be zero-copy if no missing values
+    arr = pd.array([True, False, True], dtype="boolean")
+    result = arr.to_numpy(dtype=bool)
+    result[0] = False
+    tm.assert_extension_array_equal(
+        arr, pd.array([False, False, True], dtype="boolean")
+    )
+
+    arr = pd.array([True, False, True], dtype="boolean")
+    result = arr.to_numpy(dtype=bool, copy=True)
+    result[0] = False
+    tm.assert_extension_array_equal(arr, pd.array([True, False, True], dtype="boolean"))
+
+
 def test_astype():
     # with missing values
     arr = pd.array([True, False, None], dtype="boolean")