Skip to content

Commit f284a0f

Browse files
jbrockmendelJulianWgs
authored andcommitted
REF: simplify try_cast (pandas-dev#41811)
1 parent cd1a22e commit f284a0f

File tree

3 files changed

+10
-66
lines changed

3 files changed

+10
-66
lines changed

pandas/core/construction.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
)
3333
from pandas.core.dtypes.cast import (
3434
construct_1d_arraylike_from_scalar,
35-
construct_1d_ndarray_preserving_na,
3635
construct_1d_object_array_from_listlike,
3736
maybe_cast_to_datetime,
3837
maybe_cast_to_integer_array,
@@ -726,6 +725,10 @@ def _try_cast(
726725
return subarr
727726
return ensure_wrapped_if_datetimelike(arr).astype(dtype, copy=copy)
728727

728+
elif dtype.kind == "U":
729+
# TODO: test cases with arr.dtype.kind in ["m", "M"]
730+
return lib.ensure_string_array(arr, convert_na_value=False, copy=copy)
731+
729732
elif dtype.kind in ["m", "M"]:
730733
return maybe_cast_to_datetime(arr, dtype)
731734

@@ -735,16 +738,12 @@ def _try_cast(
735738
if is_integer_dtype(dtype):
736739
# this will raise if we have e.g. floats
737740

738-
maybe_cast_to_integer_array(arr, dtype)
739-
subarr = arr
741+
subarr = maybe_cast_to_integer_array(arr, dtype)
740742
else:
741-
subarr = arr
742-
743-
if not isinstance(subarr, ABCExtensionArray):
744743
# 4 tests fail if we move this to a try/except/else; see
745744
# test_constructor_compound_dtypes, test_constructor_cast_failure
746745
# test_constructor_dict_cast2, test_loc_setitem_dtype
747-
subarr = construct_1d_ndarray_preserving_na(subarr, dtype, copy=copy)
746+
subarr = np.array(arr, dtype=dtype, copy=copy)
748747

749748
except (ValueError, TypeError):
750749
if raise_cast_failure:

pandas/core/dtypes/cast.py

-54
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
from typing import (
1515
TYPE_CHECKING,
1616
Any,
17-
Sequence,
1817
Sized,
1918
cast,
2019
overload,
@@ -1971,59 +1970,6 @@ def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray:
19711970
return result
19721971

19731972

1974-
def construct_1d_ndarray_preserving_na(
1975-
values: Sequence, dtype: np.dtype | None = None, copy: bool = False
1976-
) -> np.ndarray:
1977-
"""
1978-
Construct a new ndarray, coercing `values` to `dtype`, preserving NA.
1979-
1980-
Parameters
1981-
----------
1982-
values : Sequence
1983-
dtype : numpy.dtype, optional
1984-
copy : bool, default False
1985-
Note that copies may still be made with ``copy=False`` if casting
1986-
is required.
1987-
1988-
Returns
1989-
-------
1990-
arr : ndarray[dtype]
1991-
1992-
Examples
1993-
--------
1994-
>>> np.array([1.0, 2.0, None], dtype='str')
1995-
array(['1.0', '2.0', 'None'], dtype='<U4')
1996-
1997-
>>> construct_1d_ndarray_preserving_na([1.0, 2.0, None], dtype=np.dtype('str'))
1998-
array(['1.0', '2.0', None], dtype=object)
1999-
"""
2000-
2001-
if dtype is not None and dtype.kind == "U":
2002-
subarr = lib.ensure_string_array(values, convert_na_value=False, copy=copy)
2003-
else:
2004-
if dtype is not None:
2005-
_disallow_mismatched_datetimelike(values, dtype)
2006-
2007-
if (
2008-
dtype == object
2009-
and isinstance(values, np.ndarray)
2010-
and values.dtype.kind in ["m", "M"]
2011-
):
2012-
# TODO(numpy#12550): special-case can be removed
2013-
subarr = construct_1d_object_array_from_listlike(list(values))
2014-
elif (
2015-
dtype is not None
2016-
and dtype.kind in ["i", "u"]
2017-
and isinstance(values, np.ndarray)
2018-
and values.dtype.kind == "f"
2019-
):
2020-
return astype_float_to_int_nansafe(values, dtype, copy=copy)
2021-
else:
2022-
subarr = np.array(values, dtype=dtype, copy=copy)
2023-
2024-
return subarr
2025-
2026-
20271973
def maybe_cast_to_integer_array(
20281974
arr: list | np.ndarray, dtype: np.dtype, copy: bool = False
20291975
) -> np.ndarray:
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,22 @@
11
import numpy as np
22
import pytest
33

4-
from pandas.core.dtypes.cast import construct_1d_ndarray_preserving_na
5-
64
import pandas._testing as tm
5+
from pandas.core.construction import sanitize_array
76

87

98
@pytest.mark.parametrize(
109
"values, dtype, expected",
1110
[
12-
([1, 2, 3], None, np.array([1, 2, 3])),
11+
([1, 2, 3], None, np.array([1, 2, 3], dtype=np.int64)),
1312
(np.array([1, 2, 3]), None, np.array([1, 2, 3])),
1413
(["1", "2", None], None, np.array(["1", "2", None])),
1514
(["1", "2", None], np.dtype("str"), np.array(["1", "2", None])),
1615
([1, 2, None], np.dtype("str"), np.array(["1", "2", None])),
1716
],
1817
)
1918
def test_construct_1d_ndarray_preserving_na(values, dtype, expected):
20-
result = construct_1d_ndarray_preserving_na(values, dtype=dtype)
19+
result = sanitize_array(values, index=None, dtype=dtype)
2120
tm.assert_numpy_array_equal(result, expected)
2221

2322

@@ -27,5 +26,5 @@ def test_construct_1d_ndarray_preserving_na_datetimelike(dtype):
2726
expected = np.array(list(arr), dtype=object)
2827
assert all(isinstance(x, type(arr[0])) for x in expected)
2928

30-
result = construct_1d_ndarray_preserving_na(arr, np.dtype(object))
29+
result = sanitize_array(arr, index=None, dtype=np.dtype(object))
3130
tm.assert_numpy_array_equal(result, expected)

0 commit comments

Comments
 (0)