Skip to content

Commit a3b702f

Browse files
author
MarcoGorelli
committed
to_numpy defaults to dtype.type
1 parent 21ad93b commit a3b702f

File tree

9 files changed

+111
-40
lines changed

9 files changed

+111
-40
lines changed

doc/source/whatsnew/v1.6.0.rst

+34
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,40 @@ Optional libraries below the lowest tested version may still work, but are not c
111111

112112
See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more.
113113

114+
Nullable types get converted to their respective NumPy types in ``to_numpy``
115+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
116+
117+
Previously, ``to_numpy`` would always convert to ``object`` type:
118+
119+
*Old Behavior*
120+
121+
.. code-block:: ipython
122+
123+
In [1]: pd.Series([1, 2, 3], dtype="Float64").to_numpy()
124+
Out[1]: array([1.0, 2.0, 3.0], dtype=object)
125+
126+
Now, the above ``Series`` gets converted to ``float64``:
127+
128+
*New Behavior*
129+
130+
.. ipython:: python
131+
132+
pd.Series([1, 2, 3], dtype="Float64").to_numpy()
133+
134+
If a ``Series`` contains missing values (``pd.NA``), then when converting to ``float64``,
135+
they will be converted to ``np.nan``:
136+
137+
.. ipython:: python
138+
139+
pd.Series([1, 2, pd.NA], dtype="Float64").to_numpy()
140+
141+
If converting to a type other than ``float64``, then you need to specify an ``na_value``
142+
compatible with that ``dtype``, for example:
143+
144+
.. ipython:: python
145+
146+
pd.Series([1, 2, pd.NA], dtype="Float64").to_numpy("int64", na_value=-1)
147+
114148
.. _whatsnew_160.api_breaking.other:
115149

116150
Other API changes

pandas/_testing/asserters.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -1051,15 +1051,15 @@ def assert_series_equal(
10511051
left_values,
10521052
right_values,
10531053
check_dtype=check_dtype,
1054-
index_values=np.asarray(left.index),
1054+
index_values=np.asarray(left.index, dtype=object),
10551055
)
10561056
else:
10571057
assert_numpy_array_equal(
10581058
left_values,
10591059
right_values,
10601060
check_dtype=check_dtype,
10611061
obj=str(obj),
1062-
index_values=np.asarray(left.index),
1062+
index_values=np.asarray(left.index, dtype=object),
10631063
)
10641064
elif check_datetimelike_compat and (
10651065
needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype)
@@ -1088,7 +1088,7 @@ def assert_series_equal(
10881088
atol=atol,
10891089
check_dtype=bool(check_dtype),
10901090
obj=str(obj),
1091-
index_values=np.asarray(left.index),
1091+
index_values=np.asarray(left.index, dtype=object),
10921092
)
10931093
elif is_extension_array_dtype(left.dtype) and is_extension_array_dtype(right.dtype):
10941094
assert_extension_array_equal(
@@ -1097,7 +1097,7 @@ def assert_series_equal(
10971097
rtol=rtol,
10981098
atol=atol,
10991099
check_dtype=check_dtype,
1100-
index_values=np.asarray(left.index),
1100+
index_values=np.asarray(left.index, dtype=object),
11011101
)
11021102
elif is_extension_array_dtype_and_needs_i8_conversion(
11031103
left.dtype, right.dtype
@@ -1106,15 +1106,15 @@ def assert_series_equal(
11061106
left._values,
11071107
right._values,
11081108
check_dtype=check_dtype,
1109-
index_values=np.asarray(left.index),
1109+
index_values=np.asarray(left.index, dtype=object),
11101110
)
11111111
elif needs_i8_conversion(left.dtype) and needs_i8_conversion(right.dtype):
11121112
# DatetimeArray or TimedeltaArray
11131113
assert_extension_array_equal(
11141114
left._values,
11151115
right._values,
11161116
check_dtype=check_dtype,
1117-
index_values=np.asarray(left.index),
1117+
index_values=np.asarray(left.index, dtype=object),
11181118
)
11191119
else:
11201120
_testing.assert_almost_equal(
@@ -1124,7 +1124,7 @@ def assert_series_equal(
11241124
atol=atol,
11251125
check_dtype=bool(check_dtype),
11261126
obj=str(obj),
1127-
index_values=np.asarray(left.index),
1127+
index_values=np.asarray(left.index, dtype=object),
11281128
)
11291129

11301130
# metadata comparison

pandas/core/arrays/masked.py

+13-5
Original file line numberDiff line numberDiff line change
@@ -401,10 +401,14 @@ def to_numpy(
401401
>>> a.to_numpy(dtype="bool", na_value=False)
402402
array([ True, False, False])
403403
"""
404-
if na_value is lib.no_default:
405-
na_value = libmissing.NA
406404
if dtype is None:
407-
dtype = object
405+
dtype = self.dtype.type
406+
407+
if na_value is lib.no_default and is_float_dtype(dtype):
408+
na_value = np.nan
409+
elif na_value is lib.no_default:
410+
na_value = libmissing.NA
411+
408412
if self._hasna:
409413
if (
410414
not is_object_dtype(dtype)
@@ -413,8 +417,12 @@ def to_numpy(
413417
):
414418
raise ValueError(
415419
f"cannot convert to '{dtype}'-dtype NumPy array "
416-
"with missing values. Specify an appropriate 'na_value' "
417-
"for this dtype."
420+
"with missing values.\n"
421+
"Please either:\n"
422+
"- convert to 'float'\n"
423+
"- convert to 'object'\n"
424+
"- specify an appropriate 'na_value' for this dtype\n"
425+
"for this dtype.\n"
418426
)
419427
# don't pass copy to astype -> always need a copy since we are mutating
420428
data = self._data.astype(dtype)

pandas/io/formats/format.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1657,7 +1657,7 @@ def _format_strings(self) -> list[str]:
16571657
# Categorical is special for now, so that we can preserve tzinfo
16581658
array = values._internal_get_values()
16591659
else:
1660-
array = np.asarray(values)
1660+
array = np.asarray(values, dtype=object)
16611661

16621662
fmt_values = format_array(
16631663
array,

pandas/tests/arrays/boolean/test_construction.py

+24-16
Original file line numberDiff line numberDiff line change
@@ -214,16 +214,17 @@ def test_coerce_to_array_from_boolean_array():
214214

215215

216216
def test_coerce_to_numpy_array():
217-
# with missing values -> object dtype
217+
# with missing values -> tries but fails to convert
218218
arr = pd.array([True, False, None], dtype="boolean")
219-
result = np.array(arr)
220-
expected = np.array([True, False, pd.NA], dtype="object")
221-
tm.assert_numpy_array_equal(result, expected)
219+
with pytest.raises(
220+
ValueError, match=r"specify an appropriate 'na_value' for this dtype"
221+
):
222+
result = np.array(arr)
222223

223-
# also with no missing values -> object dtype
224+
# also with no missing values -> successfully converts to bool
224225
arr = pd.array([True, False, True], dtype="boolean")
225226
result = np.array(arr)
226-
expected = np.array([True, False, True], dtype="object")
227+
expected = np.array([True, False, True], dtype="bool")
227228
tm.assert_numpy_array_equal(result, expected)
228229

229230
# force bool dtype
@@ -233,8 +234,12 @@ def test_coerce_to_numpy_array():
233234
# with missing values will raise error
234235
arr = pd.array([True, False, None], dtype="boolean")
235236
msg = (
236-
"cannot convert to 'bool'-dtype NumPy array with missing values. "
237-
"Specify an appropriate 'na_value' for this dtype."
237+
"^cannot convert to 'bool'-dtype NumPy array with missing values.\n"
238+
"Please either:\n"
239+
"- convert to 'float'\n"
240+
"- convert to 'object'\n"
241+
"- specify an appropriate 'na_value' for this dtype\n"
242+
"for this dtype.\n$"
238243
)
239244
with pytest.raises(ValueError, match=msg):
240245
np.array(arr, dtype="bool")
@@ -260,16 +265,17 @@ def test_to_boolean_array_from_strings_invalid_string():
260265
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
261266
def test_to_numpy(box):
262267
con = pd.Series if box else pd.array
263-
# default (with or without missing values) -> object dtype
268+
# default (with or without missing values) -> bool dtype
264269
arr = con([True, False, True], dtype="boolean")
265270
result = arr.to_numpy()
266-
expected = np.array([True, False, True], dtype="object")
271+
expected = np.array([True, False, True], dtype="bool")
267272
tm.assert_numpy_array_equal(result, expected)
268273

269274
arr = con([True, False, None], dtype="boolean")
270-
result = arr.to_numpy()
271-
expected = np.array([True, False, pd.NA], dtype="object")
272-
tm.assert_numpy_array_equal(result, expected)
275+
with pytest.raises(
276+
ValueError, match="specify an appropriate 'na_value' for this dtype"
277+
):
278+
arr.to_numpy()
273279

274280
arr = con([True, False, None], dtype="boolean")
275281
result = arr.to_numpy(dtype="str")
@@ -304,11 +310,13 @@ def test_to_numpy(box):
304310
expected = np.array([1, 0, np.nan], dtype="float64")
305311
tm.assert_numpy_array_equal(result, expected)
306312

307-
# converting to int or float without specifying na_value raises
313+
# converting to int without specifying na_value raises
308314
with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"):
309315
arr.to_numpy(dtype="int64")
310-
with pytest.raises(ValueError, match="cannot convert to 'float64'-dtype"):
311-
arr.to_numpy(dtype="float64")
316+
# converting to float without specifying na_value converts NA to nan
317+
result = arr.to_numpy(dtype="float64")
318+
expected = np.array([1, 0, np.nan], dtype="float64")
319+
tm.assert_numpy_array_equal(result, expected)
312320

313321

314322
def test_to_numpy_copy():

pandas/tests/arrays/floating/test_to_numpy.py

+18-6
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,15 @@
1010
def test_to_numpy(box):
1111
con = pd.Series if box else pd.array
1212

13-
# default (with or without missing values) -> object dtype
13+
# default (with or without missing values) -> float64 dtype
1414
arr = con([0.1, 0.2, 0.3], dtype="Float64")
1515
result = arr.to_numpy()
16-
expected = np.array([0.1, 0.2, 0.3], dtype="object")
16+
expected = np.array([0.1, 0.2, 0.3], dtype="float64")
1717
tm.assert_numpy_array_equal(result, expected)
1818

1919
arr = con([0.1, 0.2, None], dtype="Float64")
2020
result = arr.to_numpy()
21-
expected = np.array([0.1, 0.2, pd.NA], dtype="object")
21+
expected = np.array([0.1, 0.2, np.nan], dtype="float64")
2222
tm.assert_numpy_array_equal(result, expected)
2323

2424

@@ -33,8 +33,9 @@ def test_to_numpy_float(box):
3333
tm.assert_numpy_array_equal(result, expected)
3434

3535
arr = con([0.1, 0.2, None], dtype="Float64")
36-
with pytest.raises(ValueError, match="cannot convert to 'float64'-dtype"):
37-
result = arr.to_numpy(dtype="float64")
36+
result = arr.to_numpy(dtype="float64")
37+
expected = np.array([0.1, 0.2, np.nan], dtype="float64")
38+
tm.assert_numpy_array_equal(result, expected)
3839

3940
# need to explicitly specify na_value
4041
result = arr.to_numpy(dtype="float64", na_value=np.nan)
@@ -100,7 +101,18 @@ def test_to_numpy_dtype(box, dtype):
100101
tm.assert_numpy_array_equal(result, expected)
101102

102103

103-
@pytest.mark.parametrize("dtype", ["float64", "float32", "int32", "int64", "bool"])
104+
@pytest.mark.parametrize("dtype", ["float64", "float32"])
105+
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
106+
def test_to_numpy_na_doesnt_raise(box, dtype):
107+
# https://github.com/pandas-dev/pandas/issues/48891
108+
con = pd.Series if box else pd.array
109+
arr = con([0.0, 1.0, None], dtype="Float64")
110+
result = arr.to_numpy(dtype=dtype)
111+
expected = np.array([0.0, 1.0, np.nan], dtype=dtype)
112+
tm.assert_numpy_array_equal(result, expected)
113+
114+
115+
@pytest.mark.parametrize("dtype", ["int32", "int64", "bool"])
104116
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
105117
def test_to_numpy_na_raises(box, dtype):
106118
con = pd.Series if box else pd.array

pandas/tests/arrays/integer/test_construction.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def test_from_dtype_from_float(data):
3737

3838
# from int / list
3939
expected = pd.Series(data)
40-
result = pd.Series(np.array(data).tolist(), dtype=str(dtype))
40+
result = pd.Series(np.array(data, dtype=object).tolist(), dtype=str(dtype))
4141
tm.assert_series_equal(result, expected)
4242

4343
# from int / array

pandas/tests/arrays/integer/test_dtypes.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def test_astype_index(all_data, dropna):
8989
other = all_data
9090

9191
dtype = all_data.dtype
92-
idx = pd.Index._with_infer(np.array(other))
92+
idx = pd.Index._with_infer(np.array(other, dtype=object))
9393
assert isinstance(idx, ABCIndex)
9494

9595
result = idx.astype(dtype)
@@ -143,7 +143,7 @@ def test_astype(all_data):
143143
# coerce to object
144144
s = pd.Series(mixed)
145145
result = s.astype("object")
146-
expected = pd.Series(np.asarray(mixed))
146+
expected = pd.Series(np.asarray(mixed, dtype=object))
147147
tm.assert_series_equal(result, expected)
148148

149149

@@ -274,13 +274,22 @@ def test_to_numpy_dtype(dtype, in_series):
274274
tm.assert_numpy_array_equal(result, expected)
275275

276276

277-
@pytest.mark.parametrize("dtype", ["float64", "int64", "bool"])
277+
@pytest.mark.parametrize("dtype", ["int64", "bool"])
278278
def test_to_numpy_na_raises(dtype):
279279
a = pd.array([0, 1, None], dtype="Int64")
280280
with pytest.raises(ValueError, match=dtype):
281281
a.to_numpy(dtype=dtype)
282282

283283

284+
@pytest.mark.parametrize("dtype", ["float64"])
285+
def test_to_numpy_na_doesnt_raise(dtype):
286+
# https://github.com/pandas-dev/pandas/issues/48891
287+
a = pd.array([0, 1, None], dtype="Int64")
288+
result = a.to_numpy(dtype=dtype)
289+
expected = np.array([0.0, 1.0, np.nan])
290+
tm.assert_numpy_array_equal(result, expected)
291+
292+
284293
def test_astype_str():
285294
a = pd.array([1, 2, None], dtype="Int64")
286295
expected = np.array(["1", "2", "<NA>"], dtype=f"{tm.ENDIAN}U21")

pandas/tests/arrays/masked_shared.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ def test_ufunc_with_out(self, dtype):
130130
# result |= mask worked because mask could be cast losslessly to
131131
# boolean ndarray. mask2 can't, so this raises
132132
result = np.zeros(3, dtype=bool)
133-
msg = "Specify an appropriate 'na_value' for this dtype"
133+
msg = "specify an appropriate 'na_value' for this dtype"
134134
with pytest.raises(ValueError, match=msg):
135135
result |= mask2
136136

0 commit comments

Comments
 (0)