Skip to content

TYP: construction #33725

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def __init__(
values = _convert_to_list_like(values)

# By convention, empty lists result in object dtype:
sanitize_dtype = "object" if len(values) == 0 else None
sanitize_dtype = np.dtype("O") if len(values) == 0 else None
null_mask = isna(values)
if null_mask.any():
values = [values[idx] for idx in np.where(~null_mask)[0]]
Expand Down
29 changes: 13 additions & 16 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from pandas._libs import lib
from pandas._libs.tslibs import IncompatibleFrequency, OutOfBoundsDatetime
from pandas._typing import ArrayLike, Dtype
from pandas._typing import ArrayLike, Dtype, DtypeObj

from pandas.core.dtypes.cast import (
construct_1d_arraylike_from_scalar,
Expand All @@ -36,7 +36,6 @@
is_list_like,
is_object_dtype,
is_timedelta64_ns_dtype,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import CategoricalDtype, ExtensionDtype, registry
from pandas.core.dtypes.generic import (
Expand All @@ -52,13 +51,12 @@
if TYPE_CHECKING:
from pandas.core.series import Series # noqa: F401
from pandas.core.indexes.api import Index # noqa: F401
from pandas.core.arrays import ExtensionArray # noqa: F401


def array(
data: Sequence[object],
dtype: Optional[Union[str, np.dtype, ExtensionDtype]] = None,
copy: bool = True,
) -> ABCExtensionArray:
data: Sequence[object], dtype: Optional[Dtype] = None, copy: bool = True,
) -> "ExtensionArray":
"""
Create an array.

Expand Down Expand Up @@ -388,14 +386,16 @@ def extract_array(obj, extract_numpy: bool = False):


def sanitize_array(
data, index, dtype=None, copy: bool = False, raise_cast_failure: bool = False
):
data,
index: Optional["Index"],
dtype: Optional[DtypeObj] = None,
copy: bool = False,
raise_cast_failure: bool = False,
) -> ArrayLike:
"""
Sanitize input data to an ndarray, copy if specified, coerce to the
dtype if specified.
Sanitize input data to an ndarray or ExtensionArray, copy if specified,
coerce to the dtype if specified.
"""
if dtype is not None:
dtype = pandas_dtype(dtype)

if isinstance(data, ma.MaskedArray):
mask = ma.getmaskarray(data)
Expand Down Expand Up @@ -508,10 +508,7 @@ def sanitize_array(


def _try_cast(
arr,
dtype: Optional[Union[np.dtype, "ExtensionDtype"]],
copy: bool,
raise_cast_failure: bool,
arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bool,
):
"""
Convert input to numpy ndarray and optionally cast to a given dtype.
Expand Down
49 changes: 26 additions & 23 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""

from datetime import date, datetime, timedelta
from typing import TYPE_CHECKING, Type
from typing import TYPE_CHECKING, Any, Optional, Tuple, Type

import numpy as np

Expand All @@ -17,7 +17,7 @@
iNaT,
)
from pandas._libs.tslibs.timezones import tz_compare
from pandas._typing import Dtype, DtypeObj
from pandas._typing import ArrayLike, Dtype, DtypeObj
from pandas.util._validators import validate_bool_kwarg

from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -613,7 +613,7 @@ def _ensure_dtype_type(value, dtype):
return dtype.type(value)


def infer_dtype_from(val, pandas_dtype: bool = False):
def infer_dtype_from(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]:
"""
Interpret the dtype from a scalar or array.

Expand All @@ -630,7 +630,7 @@ def infer_dtype_from(val, pandas_dtype: bool = False):
return infer_dtype_from_array(val, pandas_dtype=pandas_dtype)


def infer_dtype_from_scalar(val, pandas_dtype: bool = False):
def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]:
"""
Interpret the dtype from a scalar.

Expand All @@ -641,7 +641,7 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False):
If False, scalar belongs to pandas extension types is inferred as
object
"""
dtype = np.object_
dtype = np.dtype(object)

# a 1-element ndarray
if isinstance(val, np.ndarray):
Expand All @@ -660,7 +660,7 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False):
# instead of np.empty (but then you still don't want things
# coming out as np.str_!

dtype = np.object_
dtype = np.dtype(object)

elif isinstance(val, (np.datetime64, datetime)):
val = tslibs.Timestamp(val)
Expand All @@ -671,30 +671,30 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False):
dtype = DatetimeTZDtype(unit="ns", tz=val.tz)
else:
# return datetimetz as object
return np.object_, val
return np.dtype(object), val
val = val.value

elif isinstance(val, (np.timedelta64, timedelta)):
val = tslibs.Timedelta(val).value
dtype = np.dtype("m8[ns]")

elif is_bool(val):
dtype = np.bool_
dtype = np.dtype(np.bool_)

elif is_integer(val):
if isinstance(val, np.integer):
dtype = type(val)
dtype = np.dtype(type(val))
else:
dtype = np.int64
dtype = np.dtype(np.int64)

elif is_float(val):
if isinstance(val, np.floating):
dtype = type(val)
dtype = np.dtype(type(val))
else:
dtype = np.float64
dtype = np.dtype(np.float64)

elif is_complex(val):
dtype = np.complex_
dtype = np.dtype(np.complex_)

elif pandas_dtype:
if lib.is_period(val):
Expand All @@ -707,7 +707,8 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False):
return dtype, val


def infer_dtype_from_array(arr, pandas_dtype: bool = False):
# TODO: try to make the Any in the return annotation more specific
def infer_dtype_from_array(arr, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]:
"""
Infer the dtype from an array.

Expand Down Expand Up @@ -738,7 +739,7 @@ def infer_dtype_from_array(arr, pandas_dtype: bool = False):
array(['1', '1'], dtype='<U21')

>>> infer_dtype_from_array([1, '1'])
(<class 'numpy.object_'>, [1, '1'])
(dtype('O'), [1, '1'])
"""
if isinstance(arr, np.ndarray):
return arr.dtype, arr
Expand All @@ -755,7 +756,7 @@ def infer_dtype_from_array(arr, pandas_dtype: bool = False):
# don't force numpy coerce with nan's
inferred = lib.infer_dtype(arr, skipna=False)
if inferred in ["string", "bytes", "mixed", "mixed-integer"]:
return (np.object_, arr)
return (np.dtype(np.object_), arr)

arr = np.asarray(arr)
return arr.dtype, arr
Expand Down Expand Up @@ -1469,7 +1470,7 @@ def find_common_type(types):
return np.find_common_type(types, [])


def cast_scalar_to_array(shape, value, dtype=None):
def cast_scalar_to_array(shape, value, dtype: Optional[DtypeObj] = None) -> np.ndarray:
"""
Create np.ndarray of specified shape and dtype, filled with values.

Expand All @@ -1496,7 +1497,9 @@ def cast_scalar_to_array(shape, value, dtype=None):
return values


def construct_1d_arraylike_from_scalar(value, length: int, dtype):
def construct_1d_arraylike_from_scalar(
value, length: int, dtype: DtypeObj
) -> ArrayLike:
"""
create a np.ndarray / pandas type of specified shape and dtype
filled with values
Expand All @@ -1505,7 +1508,7 @@ def construct_1d_arraylike_from_scalar(value, length: int, dtype):
----------
value : scalar value
length : int
dtype : pandas_dtype / np.dtype
dtype : pandas_dtype or np.dtype

Returns
-------
Expand All @@ -1517,8 +1520,6 @@ def construct_1d_arraylike_from_scalar(value, length: int, dtype):
subarr = cls._from_sequence([value] * length, dtype=dtype)

else:
if not isinstance(dtype, (np.dtype, type(np.dtype))):
dtype = dtype.dtype

if length and is_integer_dtype(dtype) and isna(value):
# coerce if we have nan for an integer dtype
Expand All @@ -1536,7 +1537,7 @@ def construct_1d_arraylike_from_scalar(value, length: int, dtype):
return subarr


def construct_1d_object_array_from_listlike(values):
def construct_1d_object_array_from_listlike(values) -> np.ndarray:
"""
Transform any list-like object in a 1-dimensional numpy array of object
dtype.
Expand All @@ -1561,7 +1562,9 @@ def construct_1d_object_array_from_listlike(values):
return result


def construct_1d_ndarray_preserving_na(values, dtype=None, copy: bool = False):
def construct_1d_ndarray_preserving_na(
values, dtype: Optional[DtypeObj] = None, copy: bool = False
) -> np.ndarray:
"""
Construct a new ndarray, coercing `values` to `dtype`, preserving NA.

Expand Down
11 changes: 10 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@
is_scalar,
is_sequence,
needs_i8_conversion,
pandas_dtype,
)
from pandas.core.dtypes.generic import (
ABCDataFrame,
Expand Down Expand Up @@ -1917,7 +1918,12 @@ def to_records(

@classmethod
def _from_arrays(
cls, arrays, columns, index, dtype=None, verify_integrity=True
cls,
arrays,
columns,
index,
dtype: Optional[Dtype] = None,
verify_integrity: bool = True,
) -> "DataFrame":
"""
Create DataFrame from a list of arrays corresponding to the columns.
Expand All @@ -1943,6 +1949,9 @@ def _from_arrays(
-------
DataFrame
"""
if dtype is not None:
dtype = pandas_dtype(dtype)

mgr = arrays_to_mgr(
arrays,
columns,
Expand Down
Loading