pandas-dev · jorisvandenbossche · Jan 7, 2020 · Dec 18, 2019 · Dec 21, 2019 · Dec 21, 2019
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -197,6 +197,7 @@ Other enhancements
 ^^^^^^^^^^^^^^^^^^
 
 - :meth:`DataFrame.to_string` added the ``max_colwidth`` parameter to control when wide columns are truncated (:issue:`9784`)
+- Added the ``na_value`` argument to :meth:`Series.to_numpy`, :meth:`Index.to_numpy` and :meth:`DataFrame.to_numpy` to control the value used for missing data (:issue:`30322`)
 - :meth:`MultiIndex.from_product` infers level names from inputs if not explicitly provided (:issue:`27292`)
 - :meth:`DataFrame.to_latex` now accepts ``caption`` and ``label`` arguments (:issue:`25436`)
 - The :ref:`integer dtype <integer_na>` with support for missing values and the
@@ -725,7 +726,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
 - Removed the previously deprecated keywords "how", "fill_method", and "limit" from :meth:`DataFrame.resample` (:issue:`30139`)
 - Passing an integer to :meth:`Series.fillna` or :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype now raises ``TypeError`` (:issue:`24694`)
 - Passing multiple axes to :meth:`DataFrame.dropna` is no longer supported (:issue:`20995`)
-- Removed :meth:`Series.nonzero`, use `to_numpy().nonzero()` instead (:issue:`24048`)
+- Removed :meth:`Series.nonzero`, use ``to_numpy().nonzero()`` instead (:issue:`24048`)
 - Passing floating dtype ``codes`` to :meth:`Categorical.from_codes` is no longer supported, pass ``codes.astype(np.int64)`` instead (:issue:`21775`)
 - Removed the previously deprecated keyword "pat" from :meth:`Series.str.partition` and :meth:`Series.str.rpartition`, use "sep" instead (:issue:`23767`)
 - Removed :meth:`Series.put` (:issue:`27106`)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -10,6 +10,7 @@
 
 import numpy as np
 
+from pandas._libs import lib
 from pandas._typing import ArrayLike
 from pandas.compat import set_function_name
 from pandas.compat.numpy import function as nv
@@ -350,6 +351,39 @@ def __iter__(self):
         for i in range(len(self)):
             yield self[i]
 
+    def to_numpy(self, dtype=None, copy=False, na_value=lib._no_default):
+        """
+        Convert to a NumPy ndarray.
+
+        .. versionadded:: 1.0.0
+
+        This is similar to :meth:`numpy.asarray`, but may provide additional control
+        over how the conversion is done.
+
+        Parameters
+        ----------
+        dtype : str or numpy.dtype, optional
+            The dtype to pass to :meth:`numpy.asarray`.
+        copy : bool, default False
+            Whether to ensure that the returned value is a not a view on
+            another array. Note that ``copy=False`` does not *ensure* that
+            ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
+            a copy is made, even if not strictly necessary.
+        na_value : Any, optional
+            The value to use for missing values. The default value depends
+            on `dtype` and the type of the array.
+
+        Returns
+        -------
+        numpy.ndarray
+        """
+        result = np.asarray(self, dtype=dtype)
+        if copy or na_value is not lib._no_default:
+            result = result.copy()
+        if na_value is not lib._no_default:
+            result[self.isna()] = na_value
+        return result
+
     # ------------------------------------------------------------------------
     # Required attributes
     # ------------------------------------------------------------------------

diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
@@ -327,11 +327,13 @@ def __getitem__(self, item):
 
         return type(self)(self._data[item], self._mask[item])
 
-    def to_numpy(self, dtype=None, copy=False, na_value: "Scalar" = libmissing.NA):
+    def to_numpy(
+        self, dtype=None, copy=False, na_value: "Scalar" = lib._no_default, **kwargs
+    ):
         """
-        Convert to a numpy array.
+        Convert to a NumPy Array.
 
-        By default converts to a numpy object array. Specify the `dtype` and
+        By default converts to an object-dtype NumPy array. Specify the `dtype` and
         `na_value` keywords to customize the conversion.
 
         Parameters
@@ -342,18 +344,55 @@ def to_numpy(self, dtype=None, copy=False, na_value: "Scalar" = libmissing.NA):
             Whether to ensure that the returned value is a not a view on
             the array. Note that ``copy=False`` does not *ensure* that
             ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
-            a copy is made, even if not strictly necessary.
+            a copy is made, even if not strictly necessary. This is typically
+            only possible when no missing values are present and `dtype`
+            is a boolean dtype.
         na_value : scalar, optional
              Scalar missing value indicator to use in numpy array. Defaults
              to the native missing value indicator of this array (pd.NA).
 
         Returns
         -------
-        np.ndarray
+        numpy.ndarray
+
+        Examples
+        --------
+        An object-dtype is the default result
+
+        >>> a = pd.array([True, False], dtype="boolean")
+        >>> a.to_numpy()
+        array([True, False], dtype=object)
+
+        When no missing values are present, a boolean dtype can be used.
+
+        >>> a.to_numpy(dtype="bool")
+        array([ True, False])
+
+        However, requesting a bool dtype will raise a ValueError if
+        missing values are present and the default missing value :attr:`NA`
+        is used.
+
+        >>> a = pd.array([True, False, pd.NA], dtype="boolean")
+        >>> a
+        <BooleanArray>
+        [True, False, NA]
+        Length: 3, dtype: boolean
+
+        >>> a.to_numpy(dtype="bool")
+        Traceback (most recent call last):
+        ...
+        ValueError: cannot convert to bool numpy array in presence of missing values
+
+        Specify a valid `na_value` instead
+
+        >>> a.to_numpy(dtype="bool", na_value=False)
+        array([ True, False, False])
         """
+        if na_value is lib._no_default:
+            na_value = libmissing.NA
         if dtype is None:
             dtype = object
-        if self.isna().any():
+        if self._hasna:
             if is_bool_dtype(dtype) and na_value is libmissing.NA:
                 raise ValueError(
                     "cannot convert to bool numpy array in presence of missing values"
@@ -550,7 +589,7 @@ def astype(self, dtype, copy=True):
             na_value = np.nan
         # coerce
         data = self.to_numpy(na_value=na_value)
-        return astype_nansafe(data, dtype, copy=None)
+        return astype_nansafe(data, dtype, copy=False)
 
     def value_counts(self, dropna=True):
         """

diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
@@ -421,7 +421,7 @@ def skew(self, axis=None, dtype=None, out=None, keepdims=False, skipna=True):
 
     # ------------------------------------------------------------------------
     # Additional Methods
-    def to_numpy(self, dtype=None, copy=False):
+    def to_numpy(self, dtype=None, copy=False, na_value=lib._no_default):
         """
         Convert the PandasArray to a :class:`numpy.ndarray`.
 
@@ -433,15 +433,21 @@ def to_numpy(self, dtype=None, copy=False):
             The NumPy dtype to pass to :func:`numpy.asarray`.
         copy : bool, default False
             Whether to copy the underlying data.
+        na_value : Scalar, optional
+            The missing value to use for missing values.
 
         Returns
         -------
         ndarray
         """
         result = np.asarray(self._ndarray, dtype=dtype)
-        if copy and result is self._ndarray:
+
+        if (copy or na_value is not lib._no_default) and result is self._ndarray:
             result = result.copy()
 
+        if na_value is not lib._no_default:
+            result[self.isna()] = na_value
+
         return result
 
     @Appender(ExtensionArray.searchsorted.__doc__)

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -19,7 +19,6 @@
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
     is_datetime64_ns_dtype,
-    is_datetime64tz_dtype,
     is_dict_like,
     is_extension_array_dtype,
     is_list_like,
@@ -785,7 +784,7 @@ def array(self) -> ExtensionArray:
 
         return result
 
-    def to_numpy(self, dtype=None, copy=False, **kwargs):
+    def to_numpy(self, dtype=None, copy=False, na_value=lib._no_default, **kwargs):
         """
         A NumPy ndarray representing the values in this Series or Index.
 
@@ -800,6 +799,12 @@ def to_numpy(self, dtype=None, copy=False, **kwargs):
             another array. Note that ``copy=False`` does not *ensure* that
             ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
             a copy is made, even if not strictly necessary.
+        na_value : Any, optional
+            The value to use for missing values. The default value depends
+            on `dtype` and the type of the array.
+
+            .. versionadded:: 1.0.0
+
         **kwargs
             Additional keywords passed through to the ``to_numpy`` method
             of the underlying array (for extension arrays).
@@ -874,24 +879,21 @@ def to_numpy(self, dtype=None, copy=False, **kwargs):
         array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00...'],
               dtype='datetime64[ns]')
         """
-        if is_extension_array_dtype(self.dtype) and hasattr(self.array, "to_numpy"):
-            return self.array.to_numpy(dtype, copy=copy, **kwargs)
+        if is_extension_array_dtype(self.dtype):
+            return self.array.to_numpy(dtype, copy=copy, na_value=na_value, **kwargs)
         else:
             if kwargs:
                 msg = "to_numpy() got an unexpected keyword argument '{}'".format(
                     list(kwargs.keys())[0]
                 )
                 raise TypeError(msg)
-        if is_datetime64tz_dtype(self.dtype) and dtype is None:
-            # note: this is going to change very soon.
-            # I have a WIP PR making this unnecessary, but it's
-            # a bit out of scope for the DatetimeArray PR.
-            dtype = "object"
 
         result = np.asarray(self._values, dtype=dtype)
         # TODO(GH-24345): Avoid potential double copy
-        if copy:
+        if copy or na_value is not lib._no_default:
             result = result.copy()
+            if na_value is not lib._no_default:
+                result[self.isna()] = na_value
         return result
 
     @property

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1242,7 +1242,7 @@ def from_dict(cls, data, orient="columns", dtype=None, columns=None):
 
         return cls(data, index=index, columns=columns, dtype=dtype)
 
-    def to_numpy(self, dtype=None, copy=False):
+    def to_numpy(self, dtype=None, copy=False, na_value=lib._no_default):
         """
         Convert the DataFrame to a NumPy array.
 
@@ -1264,6 +1264,12 @@ def to_numpy(self, dtype=None, copy=False):
             ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
             a copy is made, even if not strictly necessary.
 
+        na_value : Any, optional
+            The value to use for missing values. The default value depends
+            on `dtype` and the type of the array.
+
+            .. versionadded:: 1.0.0
+
         Returns
         -------
         numpy.ndarray
@@ -1295,6 +1301,13 @@ def to_numpy(self, dtype=None, copy=False):
                [2, 4.5, Timestamp('2000-01-02 00:00:00')]], dtype=object)
         """
         result = np.array(self.values, dtype=dtype, copy=copy)
+        if na_value is not lib._no_default:
+            if not copy:
+                # copy even if not requested. This may be unnecessary
+                # if NumPy already copied.
+                result = result.copy()
+
+            result[self.isna()] = na_value
         return result
 
     def to_dict(self, orient="dict", into=dict):

diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py
@@ -4,6 +4,7 @@
 import pytest
 
 from pandas._libs import OutOfBoundsDatetime
+from pandas.compat.numpy import _np_version_under1p18
 
 import pandas as pd
 from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray
@@ -758,3 +759,38 @@ def test_invalid_nat_setitem_array(array, non_casting_nats):
     for nat in non_casting_nats:
         with pytest.raises(TypeError):
             array[0] = nat
+
+
+@pytest.mark.parametrize(
+    "array",
+    [
+        pd.date_range("2000", periods=4).array,
+        pd.timedelta_range("2000", periods=4).array,
+    ],
+)
+def test_to_numpy_extra(array):
+    if _np_version_under1p18:
+        # np.isnan(NaT) raises, so use pandas'
+        isnan = pd.isna
+    else:
+        isnan = np.isnan
+
+    array[0] = pd.NaT
+    original = array.copy()
+
+    result = array.to_numpy()
+    assert isnan(result[0])
+
+    result = array.to_numpy(dtype="int64")
+    assert result[0] == -9223372036854775808
+
+    result = array.to_numpy(dtype="int64", na_value=0)
+    assert result[0] == 0
+
+    result = array.to_numpy(na_value=array[1].to_numpy())
+    assert result[0] == result[1]
+
+    result = array.to_numpy(na_value=array[1].to_numpy(copy=False))
+    assert result[0] == result[1]
+
+    tm.assert_equal(array, original)
diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py
@@ -401,3 +401,36 @@ def test_to_numpy_dtype(as_series):
     result = obj.to_numpy(dtype="M8[ns]")
     expected = np.array(["2000-01-01T05", "2001-01-01T05"], dtype="M8[ns]")
     tm.assert_numpy_array_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "values, dtype, na_value, expected",
+    [
+        ([1, 2, None], "float64", 0, [1.0, 2.0, 0.0]),
+        (
+            [pd.Timestamp("2000"), pd.Timestamp("2000"), pd.NaT],
+            None,
+            pd.Timestamp("2000"),
+            [np.datetime64("2000-01-01T00:00:00.000000000")] * 3,
+        ),
+    ],
+)
+@pytest.mark.parametrize("container", [pd.Series, pd.Index])  # type: ignore
+def test_to_numpy_na_value_numpy_dtype(container, values, dtype, na_value, expected):
+    s = container(values)
+    result = s.to_numpy(dtype=dtype, na_value=na_value)
+    expected = np.array(expected)
+    tm.assert_numpy_array_equal(result, expected)
+
+
+def test_to_numpy_kwargs_raises():
+    # numpy
+    s = pd.Series([1, 2, 3])
+    match = r"to_numpy\(\) got an unexpected keyword argument 'foo'"
+    with pytest.raises(TypeError, match=match):
+        s.to_numpy(foo=True)
+
+    # extension
+    s = pd.Series([1, 2, 3], dtype="Int64")
+    with pytest.raises(TypeError, match=match):
+        s.to_numpy(foo=True)
diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py
@@ -1,3 +1,5 @@
+import numpy as np
+
 import pandas as pd
 from pandas.core.internals import ObjectBlock
 
@@ -21,3 +23,12 @@ def test_astype_str(self, data):
         result = pd.Series(data[:5]).astype(str)
         expected = pd.Series(data[:5].astype(str))
         self.assert_series_equal(result, expected)
+
+    def test_to_numpy(self, data):
+        expected = np.asarray(data)
+
+        result = data.to_numpy()
+        self.assert_equal(result, expected)
+
+        result = pd.Series(data).to_numpy()
+        self.assert_equal(result, expected)