Skip to content

Commit 813918a

Browse files
committed
API: Standard signature for to_numpy
This is part 1 of pandas-dev#23995 We make the signature of `to_numpy(dtype : Union[str, np.dtype], copy : bool) -> ndarray`
1 parent 1bfeb90 commit 813918a

File tree

7 files changed

+117
-19
lines changed

7 files changed

+117
-19
lines changed

pandas/core/base.py

+29-12
Original file line numberDiff line numberDiff line change
@@ -841,18 +841,22 @@ def array(self):
841841
"""
842842
return self._values
843843

844-
def to_numpy(self):
844+
def to_numpy(self, dtype=None, copy=False):
845845
"""
846846
A NumPy ndarray representing the values in this Series or Index.
847847
848848
.. versionadded:: 0.24.0
849849
850-
The returned array will be the same up to equality (values equal
851-
in `self` will be equal in the returned array; likewise for values
852-
that are not equal). When `self` contains an ExtensionArray, the
853-
dtype may be different. For example, for a category-dtype Series,
854-
``to_numpy()`` will return a NumPy array and the categorical dtype
855-
will be lost.
850+
851+
Parameters
852+
----------
853+
dtype : str or numpy.dtype, optional
854+
The dtype to pass to :meth:`numpy.asarray`
855+
copy : bool, default False
856+
Whether to ensure that the returned value is a not a view on
857+
another array. Note that ``copy=False`` does not *ensure* that
858+
``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
859+
a copy is made, even if not strictly necessary.
856860
857861
Returns
858862
-------
@@ -866,10 +870,18 @@ def to_numpy(self):
866870
867871
Notes
868872
-----
873+
The returned array will be the same up to equality (values equal
874+
in `self` will be equal in the returned array; likewise for values
875+
that are not equal). When `self` contains an ExtensionArray, the
876+
dtype may be different. For example, for a category-dtype Series,
877+
``to_numpy()`` will return a NumPy array and the categorical dtype
878+
will be lost.
879+
880+
869881
For NumPy dtypes, this will be a reference to the actual data stored
870-
in this Series or Index. Modifying the result in place will modify
871-
the data stored in the Series or Index (not that we recommend doing
872-
that).
882+
in this Series or Index (assuming ``copy=False``). Modifying the result
883+
in place will modify the data stored in the Series or Index (not that
884+
we recommend doing that).
873885
874886
For extension types, ``to_numpy()`` *may* require copying data and
875887
coercing the result to a NumPy type (possibly object), which may be
@@ -898,8 +910,13 @@ def to_numpy(self):
898910
if (is_extension_array_dtype(self.dtype) or
899911
is_datetime64tz_dtype(self.dtype)):
900912
# TODO(DatetimeArray): remove the second clause.
901-
return np.asarray(self._values)
902-
return self._values
913+
result = np.asarray(self._values, dtype=dtype)
914+
else:
915+
result = self._values
916+
917+
if copy:
918+
result = result.copy()
919+
return result
903920

904921
@property
905922
def _ndarray_values(self):

pandas/core/frame.py

+18-7
Original file line numberDiff line numberDiff line change
@@ -1072,17 +1072,27 @@ def from_dict(cls, data, orient='columns', dtype=None, columns=None):
10721072

10731073
return cls(data, index=index, columns=columns, dtype=dtype)
10741074

1075-
def to_numpy(self):
1075+
def to_numpy(self, dtype=None, copy=False):
10761076
"""
10771077
Convert the DataFrame to a NumPy array.
10781078
10791079
.. versionadded:: 0.24.0
10801080
1081-
The dtype of the returned array will be the common NumPy
1082-
dtype of all types in the DataFrame. For example,
1083-
if the dtypes are ``float16`` and ``float32``, the results
1084-
dtype will be ``float32``. This may require copying data and
1085-
coercing values, which may be expensive.
1081+
By default, the dtype of the returned array will be the common NumPy
1082+
dtype of all types in the DataFrame. For example, if the dtypes are
1083+
``float16`` and ``float32``, the results dtype will be ``float32``.
1084+
This may require copying data and coercing values, which may be
1085+
expensive.
1086+
1087+
Parameters
1088+
----------
1089+
dtype : str or numpy.dtype, optional
1090+
The dtype to pass to :meth:`numpy.asarray`
1091+
copy : bool, default False
1092+
Whether to ensure that the returned value is a not a view on
1093+
another array. Note that ``copy=False`` does not *ensure* that
1094+
``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
1095+
a copy is made, even if not strictly necessary.
10861096
10871097
Returns
10881098
-------
@@ -1114,7 +1124,8 @@ def to_numpy(self):
11141124
array([[1, 3.0, Timestamp('2000-01-01 00:00:00')],
11151125
[2, 4.5, Timestamp('2000-01-02 00:00:00')]], dtype=object)
11161126
"""
1117-
return self.values
1127+
result = np.array(self.values, dtype=dtype, copy=copy)
1128+
return result
11181129

11191130
def to_dict(self, orient='dict', into=dict):
11201131
"""

pandas/tests/frame/test_api.py

+13
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,19 @@ def test_to_numpy(self):
325325
result = df.to_numpy()
326326
tm.assert_numpy_array_equal(result, expected)
327327

328+
def test_to_numpy_dtype(self):
329+
df = pd.DataFrame({"A": [1, 2], "B": [3, 4.5]})
330+
expected = np.array([[1, 3], [2, 4]], dtype="int64")
331+
result = df.to_numpy(dtype="int64")
332+
tm.assert_numpy_array_equal(result, expected)
333+
334+
def test_to_numpy_copy(self):
335+
arr = np.random.randn(4, 3)
336+
df = pd.DataFrame(arr)
337+
assert df.values.base is arr
338+
assert df.to_numpy(copy=False).base is arr
339+
assert df.to_numpy(copy=True).base is None
340+
328341
def test_transpose(self, float_frame):
329342
frame = float_frame
330343
dft = frame.T

pandas/tests/indexes/multi/test_conversion.py

+6
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@ def test_tolist(idx):
1717
assert result == exp
1818

1919

20+
def test_to_numpy(idx):
21+
result = idx.to_numpy()
22+
exp = idx.values
23+
tm.assert_numpy_array_equal(result, exp)
24+
25+
2026
def test_to_frame():
2127
tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')]
2228

pandas/tests/indexes/period/test_period.py

+7
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,13 @@ def test_dtype_str(self):
120120
assert pi.dtype_str == 'period[3M]'
121121
assert pi.dtype_str == str(pi.dtype)
122122

123+
def test_to_numpy(self):
124+
arr = period_range('2000', periods=2)
125+
result = arr.to_numpy()
126+
expected = np.array([pd.Period('2000-01-01', 'D'),
127+
pd.Period('2000-01-01', 'D')])
128+
tm.assert_numpy_array_equal(result, expected)
129+
123130
def test_view_asi8(self):
124131
idx = pd.PeriodIndex([], freq='M')
125132

pandas/tests/indexes/test_common.py

+4
Original file line numberDiff line numberDiff line change
@@ -341,3 +341,7 @@ def test_has_duplicates(self, indices):
341341
idx = holder([indices[0]] * 5)
342342
assert idx.is_unique is False
343343
assert idx.has_duplicates is True
344+
345+
def test_to_numpy(self, indices):
346+
result = indices.to_numpy()
347+
assert result is None

pandas/tests/test_base.py

+40
Original file line numberDiff line numberDiff line change
@@ -1301,3 +1301,43 @@ def test_to_numpy(array, expected, box):
13011301

13021302
result = thing.to_numpy()
13031303
tm.assert_numpy_array_equal(result, expected)
1304+
1305+
1306+
@pytest.mark.parametrize("as_series", [True, False])
1307+
@pytest.mark.parametrize("arr", [
1308+
np.array([1, 2, 3], dtype="int64"),
1309+
np.array(['a', 'b', 'c'], dtype=object),
1310+
])
1311+
def test_to_numpy_copy(arr, as_series):
1312+
obj = pd.Index(arr, copy=False)
1313+
if as_series:
1314+
obj = pd.Series(obj.values, copy=False)
1315+
1316+
# no copy by default
1317+
result = obj.to_numpy()
1318+
assert np.shares_memory(arr, result) is True
1319+
1320+
result = obj.to_numpy(copy=False)
1321+
assert np.shares_memory(arr, result) is True
1322+
1323+
# copy=True
1324+
result = obj.to_numpy(copy=True)
1325+
assert np.shares_memory(arr, result) is False
1326+
1327+
1328+
@pytest.mark.parametrize("as_series", [True, False])
1329+
def test_to_numpy_dtype(as_series):
1330+
tz = "US/Eastern"
1331+
obj = pd.DatetimeIndex(['2000', '2001'], tz=tz)
1332+
if as_series:
1333+
obj = pd.Series(obj)
1334+
result = obj.to_numpy(dtype=object)
1335+
expected = np.array([pd.Timestamp('2000', tz=tz),
1336+
pd.Timestamp('2001', tz=tz)],
1337+
dtype=object)
1338+
tm.assert_numpy_array_equal(result, expected)
1339+
1340+
result = obj.to_numpy()
1341+
expected = np.array(['2000-01-01T05', '2001-01-01T05'],
1342+
dtype='M8[ns]')
1343+
tm.assert_numpy_array_equal(result, expected)

0 commit comments

Comments
 (0)