From 1182cee690a5a5f39ba46650c8243b3f3b603315 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 1 Jun 2021 16:03:21 -0700 Subject: [PATCH 1/3] REF: simplify _try_cast --- pandas/core/construction.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index c877d27fd2392..8e4f13f1e0a19 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -32,7 +32,6 @@ ) from pandas.core.dtypes.cast import ( construct_1d_arraylike_from_scalar, - construct_1d_ndarray_preserving_na, construct_1d_object_array_from_listlike, maybe_cast_to_datetime, maybe_cast_to_integer_array, @@ -735,6 +734,10 @@ def _try_cast( return subarr return ensure_wrapped_if_datetimelike(arr).astype(dtype, copy=copy) + elif dtype.kind == "U": + # TODO: test cases with arr.dtype.kind in ["m", "M"] + return lib.ensure_string_array(arr, convert_na_value=False, copy=copy) + elif dtype.kind in ["m", "M"]: return maybe_cast_to_datetime(arr, dtype) @@ -744,16 +747,12 @@ def _try_cast( if is_integer_dtype(dtype): # this will raise if we have e.g. floats - maybe_cast_to_integer_array(arr, dtype) - subarr = arr + subarr = maybe_cast_to_integer_array(arr, dtype) else: - subarr = arr - - if not isinstance(subarr, ABCExtensionArray): # 4 tests fail if we move this to a try/except/else; see # test_constructor_compound_dtypes, test_constructor_cast_failure # test_constructor_dict_cast2, test_loc_setitem_dtype - subarr = construct_1d_ndarray_preserving_na(subarr, dtype, copy=copy) + subarr = np.array(arr, dtype=dtype, copy=copy) except (ValueError, TypeError): if raise_cast_failure: From fba570634959d37a7007e2da0c3cce5f2f261525 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 3 Jun 2021 18:54:45 -0700 Subject: [PATCH 2/3] CLN: remove no-longer-sued construct_1d_ndarray_preserving_na --- pandas/core/dtypes/cast.py | 54 ------------------- .../dtypes/cast/test_construct_ndarray.py | 7 ++- 2 files changed, 3 insertions(+), 58 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 177b1ccd166cb..3a8ce27464d26 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -14,7 +14,6 @@ from typing import ( TYPE_CHECKING, Any, - Sequence, Sized, cast, overload, @@ -1965,59 +1964,6 @@ def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray: return result -def construct_1d_ndarray_preserving_na( - values: Sequence, dtype: np.dtype | None = None, copy: bool = False -) -> np.ndarray: - """ - Construct a new ndarray, coercing `values` to `dtype`, preserving NA. - - Parameters - ---------- - values : Sequence - dtype : numpy.dtype, optional - copy : bool, default False - Note that copies may still be made with ``copy=False`` if casting - is required. - - Returns - ------- - arr : ndarray[dtype] - - Examples - -------- - >>> np.array([1.0, 2.0, None], dtype='str') - array(['1.0', '2.0', 'None'], dtype='>> construct_1d_ndarray_preserving_na([1.0, 2.0, None], dtype=np.dtype('str')) - array(['1.0', '2.0', None], dtype=object) - """ - - if dtype is not None and dtype.kind == "U": - subarr = lib.ensure_string_array(values, convert_na_value=False, copy=copy) - else: - if dtype is not None: - _disallow_mismatched_datetimelike(values, dtype) - - if ( - dtype == object - and isinstance(values, np.ndarray) - and values.dtype.kind in ["m", "M"] - ): - # TODO(numpy#12550): special-case can be removed - subarr = construct_1d_object_array_from_listlike(list(values)) - elif ( - dtype is not None - and dtype.kind in ["i", "u"] - and isinstance(values, np.ndarray) - and values.dtype.kind == "f" - ): - return astype_float_to_int_nansafe(values, dtype, copy=copy) - else: - subarr = np.array(values, dtype=dtype, copy=copy) - - return subarr - - def maybe_cast_to_integer_array( arr: list | np.ndarray, dtype: np.dtype, copy: bool = False ) -> np.ndarray: diff --git a/pandas/tests/dtypes/cast/test_construct_ndarray.py b/pandas/tests/dtypes/cast/test_construct_ndarray.py index 72da93a5c4de3..cbc021a6aebc6 100644 --- a/pandas/tests/dtypes/cast/test_construct_ndarray.py +++ b/pandas/tests/dtypes/cast/test_construct_ndarray.py @@ -1,9 +1,8 @@ import numpy as np import pytest -from pandas.core.dtypes.cast import construct_1d_ndarray_preserving_na - import pandas._testing as tm +from pandas.core.construction import sanitize_array @pytest.mark.parametrize( @@ -17,7 +16,7 @@ ], ) def test_construct_1d_ndarray_preserving_na(values, dtype, expected): - result = construct_1d_ndarray_preserving_na(values, dtype=dtype) + result = sanitize_array(values, index=None, dtype=dtype) tm.assert_numpy_array_equal(result, expected) @@ -27,5 +26,5 @@ def test_construct_1d_ndarray_preserving_na_datetimelike(dtype): expected = np.array(list(arr), dtype=object) assert all(isinstance(x, type(arr[0])) for x in expected) - result = construct_1d_ndarray_preserving_na(arr, np.dtype(object)) + result = sanitize_array(arr, index=None, dtype=np.dtype(object)) tm.assert_numpy_array_equal(result, expected) From e35dd61abaae730b3bfedeaef31156a1d45f691c Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 4 Jun 2021 13:08:41 -0700 Subject: [PATCH 3/3] 32bit compat --- pandas/tests/dtypes/cast/test_construct_ndarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/dtypes/cast/test_construct_ndarray.py b/pandas/tests/dtypes/cast/test_construct_ndarray.py index cbc021a6aebc6..10085ddde5c8f 100644 --- a/pandas/tests/dtypes/cast/test_construct_ndarray.py +++ b/pandas/tests/dtypes/cast/test_construct_ndarray.py @@ -8,7 +8,7 @@ @pytest.mark.parametrize( "values, dtype, expected", [ - ([1, 2, 3], None, np.array([1, 2, 3])), + ([1, 2, 3], None, np.array([1, 2, 3], dtype=np.int64)), (np.array([1, 2, 3]), None, np.array([1, 2, 3])), (["1", "2", None], None, np.array(["1", "2", None])), (["1", "2", None], np.dtype("str"), np.array(["1", "2", None])),