From b37a103899aa43a8620b5b64ca8785a37dbaf069 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 4 Dec 2020 21:25:09 -0800 Subject: [PATCH 1/3] REF: remove unnecessary cast_scalar_to_array --- pandas/core/dtypes/cast.py | 31 +------------------- pandas/core/frame.py | 17 ++++------- pandas/tests/dtypes/cast/test_infer_dtype.py | 27 +---------------- 3 files changed, 8 insertions(+), 67 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 12974d56dacdc..c8ee5535864bb 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -33,7 +33,7 @@ ints_to_pytimedelta, ) from pandas._libs.tslibs.timezones import tz_compare -from pandas._typing import AnyArrayLike, ArrayLike, Dtype, DtypeObj, Scalar, Shape +from pandas._typing import AnyArrayLike, ArrayLike, Dtype, DtypeObj, Scalar from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.common import ( @@ -1593,35 +1593,6 @@ def find_common_type(types: List[DtypeObj]) -> DtypeObj: return np.find_common_type(types, []) -def cast_scalar_to_array( - shape: Shape, value: Scalar, dtype: Optional[DtypeObj] = None -) -> np.ndarray: - """ - Create np.ndarray of specified shape and dtype, filled with values. - - Parameters - ---------- - shape : tuple - value : scalar value - dtype : np.dtype, optional - dtype to coerce - - Returns - ------- - ndarray of shape, filled with value, of specified / inferred dtype - - """ - if dtype is None: - dtype, fill_value = infer_dtype_from_scalar(value) - else: - fill_value = value - - values = np.empty(shape, dtype=dtype) - values.fill(fill_value) - - return values - - def construct_1d_arraylike_from_scalar( value: Scalar, length: int, dtype: DtypeObj ) -> ArrayLike: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f710660d6ad8e..faa4802139ef2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -78,7 +78,6 @@ ) from pandas.core.dtypes.cast import ( - cast_scalar_to_array, coerce_to_dtypes, construct_1d_arraylike_from_scalar, find_common_type, @@ -616,9 +615,9 @@ def __init__( if arr.ndim != 0: raise ValueError("DataFrame constructor not properly called!") - values = cast_scalar_to_array( - (len(index), len(columns)), data, dtype=dtype - ) + arr = np.atleast_2d(arr) + shape = (len(index), len(columns)) + values = np.tile(arr, shape) mgr = init_ndarray( values, index, columns, dtype=values.dtype, copy=False @@ -3915,7 +3914,7 @@ def reindexer(value): else: # cast ignores pandas dtypes. so save the dtype first - infer_dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=True) + infer_dtype, fill_value = infer_dtype_from_scalar(value, pandas_dtype=True) # upcast if is_extension_array_dtype(infer_dtype): @@ -3923,12 +3922,8 @@ def reindexer(value): value, len(self.index), infer_dtype ) else: - # pandas\core\frame.py:3827: error: Argument 1 to - # "cast_scalar_to_array" has incompatible type "int"; expected - # "Tuple[Any, ...]" [arg-type] - value = cast_scalar_to_array( - len(self.index), value # type: ignore[arg-type] - ) + value = np.empty(self.shape[:1], dtype=infer_dtype) + value.fill(fill_value) value = maybe_cast_to_datetime(value, infer_dtype) diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index 157adacbdfdf7..65da8985843f9 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -3,11 +3,7 @@ import numpy as np import pytest -from pandas.core.dtypes.cast import ( - cast_scalar_to_array, - infer_dtype_from_array, - infer_dtype_from_scalar, -) +from pandas.core.dtypes.cast import infer_dtype_from_array, infer_dtype_from_scalar from pandas.core.dtypes.common import is_dtype_equal from pandas import ( @@ -19,7 +15,6 @@ Timestamp, date_range, ) -import pandas._testing as tm @pytest.fixture(params=[True, False]) @@ -176,23 +171,3 @@ def test_infer_dtype_from_scalar_errors(): def test_infer_dtype_from_array(arr, expected, pandas_dtype): dtype, _ = infer_dtype_from_array(arr, pandas_dtype=pandas_dtype) assert is_dtype_equal(dtype, expected) - - -@pytest.mark.parametrize( - "obj,dtype", - [ - (1, np.int64), - (1.1, np.float64), - (Timestamp("2011-01-01"), "datetime64[ns]"), - (Timestamp("2011-01-01", tz="US/Eastern"), object), - (Period("2011-01-01", freq="D"), object), - ], -) -def test_cast_scalar_to_array(obj, dtype): - shape = (3, 2) - - exp = np.empty(shape, dtype=dtype) - exp.fill(obj) - - arr = cast_scalar_to_array(shape, obj, dtype=dtype) - tm.assert_numpy_array_equal(arr, exp) From 01d5679d92ab943e1ee9de3a3132f199e5422713 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 4 Dec 2020 21:48:38 -0800 Subject: [PATCH 2/3] simplify --- pandas/core/frame.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index faa4802139ef2..431272a5f6070 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3916,14 +3916,9 @@ def reindexer(value): # cast ignores pandas dtypes. so save the dtype first infer_dtype, fill_value = infer_dtype_from_scalar(value, pandas_dtype=True) - # upcast - if is_extension_array_dtype(infer_dtype): - value = construct_1d_arraylike_from_scalar( - value, len(self.index), infer_dtype - ) - else: - value = np.empty(self.shape[:1], dtype=infer_dtype) - value.fill(fill_value) + value = construct_1d_arraylike_from_scalar( + fill_value, len(self), infer_dtype + ) value = maybe_cast_to_datetime(value, infer_dtype) From 9ff959fc73ef7b5dbb09de65716e27530cf41518 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 5 Dec 2020 08:52:41 -0800 Subject: [PATCH 3/3] use np.full --- pandas/core/frame.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 431272a5f6070..e0b04cbb5ce11 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -615,9 +615,8 @@ def __init__( if arr.ndim != 0: raise ValueError("DataFrame constructor not properly called!") - arr = np.atleast_2d(arr) shape = (len(index), len(columns)) - values = np.tile(arr, shape) + values = np.full(shape, arr) mgr = init_ndarray( values, index, columns, dtype=values.dtype, copy=False