Skip to content

Commit 1a82659

Browse files
authored
TYP: construction (#33725)
1 parent 428791c commit 1a82659

File tree

5 files changed

+92
-61
lines changed

5 files changed

+92
-61
lines changed

pandas/core/arrays/categorical.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ def __init__(
330330
values = _convert_to_list_like(values)
331331

332332
# By convention, empty lists result in object dtype:
333-
sanitize_dtype = "object" if len(values) == 0 else None
333+
sanitize_dtype = np.dtype("O") if len(values) == 0 else None
334334
null_mask = isna(values)
335335
if null_mask.any():
336336
values = [values[idx] for idx in np.where(~null_mask)[0]]

pandas/core/construction.py

+13-16
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
from pandas._libs import lib
1515
from pandas._libs.tslibs import IncompatibleFrequency, OutOfBoundsDatetime
16-
from pandas._typing import ArrayLike, Dtype
16+
from pandas._typing import ArrayLike, Dtype, DtypeObj
1717

1818
from pandas.core.dtypes.cast import (
1919
construct_1d_arraylike_from_scalar,
@@ -36,7 +36,6 @@
3636
is_list_like,
3737
is_object_dtype,
3838
is_timedelta64_ns_dtype,
39-
pandas_dtype,
4039
)
4140
from pandas.core.dtypes.dtypes import CategoricalDtype, ExtensionDtype, registry
4241
from pandas.core.dtypes.generic import (
@@ -52,13 +51,12 @@
5251
if TYPE_CHECKING:
5352
from pandas.core.series import Series # noqa: F401
5453
from pandas.core.indexes.api import Index # noqa: F401
54+
from pandas.core.arrays import ExtensionArray # noqa: F401
5555

5656

5757
def array(
58-
data: Sequence[object],
59-
dtype: Optional[Union[str, np.dtype, ExtensionDtype]] = None,
60-
copy: bool = True,
61-
) -> ABCExtensionArray:
58+
data: Sequence[object], dtype: Optional[Dtype] = None, copy: bool = True,
59+
) -> "ExtensionArray":
6260
"""
6361
Create an array.
6462
@@ -388,14 +386,16 @@ def extract_array(obj, extract_numpy: bool = False):
388386

389387

390388
def sanitize_array(
391-
data, index, dtype=None, copy: bool = False, raise_cast_failure: bool = False
392-
):
389+
data,
390+
index: Optional["Index"],
391+
dtype: Optional[DtypeObj] = None,
392+
copy: bool = False,
393+
raise_cast_failure: bool = False,
394+
) -> ArrayLike:
393395
"""
394-
Sanitize input data to an ndarray, copy if specified, coerce to the
395-
dtype if specified.
396+
Sanitize input data to an ndarray or ExtensionArray, copy if specified,
397+
coerce to the dtype if specified.
396398
"""
397-
if dtype is not None:
398-
dtype = pandas_dtype(dtype)
399399

400400
if isinstance(data, ma.MaskedArray):
401401
mask = ma.getmaskarray(data)
@@ -508,10 +508,7 @@ def sanitize_array(
508508

509509

510510
def _try_cast(
511-
arr,
512-
dtype: Optional[Union[np.dtype, "ExtensionDtype"]],
513-
copy: bool,
514-
raise_cast_failure: bool,
511+
arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bool,
515512
):
516513
"""
517514
Convert input to numpy ndarray and optionally cast to a given dtype.

pandas/core/dtypes/cast.py

+26-23
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"""
44

55
from datetime import date, datetime, timedelta
6-
from typing import TYPE_CHECKING, Type
6+
from typing import TYPE_CHECKING, Any, Optional, Tuple, Type
77

88
import numpy as np
99

@@ -17,7 +17,7 @@
1717
iNaT,
1818
)
1919
from pandas._libs.tslibs.timezones import tz_compare
20-
from pandas._typing import Dtype, DtypeObj
20+
from pandas._typing import ArrayLike, Dtype, DtypeObj
2121
from pandas.util._validators import validate_bool_kwarg
2222

2323
from pandas.core.dtypes.common import (
@@ -613,7 +613,7 @@ def _ensure_dtype_type(value, dtype):
613613
return dtype.type(value)
614614

615615

616-
def infer_dtype_from(val, pandas_dtype: bool = False):
616+
def infer_dtype_from(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]:
617617
"""
618618
Interpret the dtype from a scalar or array.
619619
@@ -630,7 +630,7 @@ def infer_dtype_from(val, pandas_dtype: bool = False):
630630
return infer_dtype_from_array(val, pandas_dtype=pandas_dtype)
631631

632632

633-
def infer_dtype_from_scalar(val, pandas_dtype: bool = False):
633+
def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]:
634634
"""
635635
Interpret the dtype from a scalar.
636636
@@ -641,7 +641,7 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False):
641641
If False, scalar belongs to pandas extension types is inferred as
642642
object
643643
"""
644-
dtype = np.object_
644+
dtype = np.dtype(object)
645645

646646
# a 1-element ndarray
647647
if isinstance(val, np.ndarray):
@@ -660,7 +660,7 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False):
660660
# instead of np.empty (but then you still don't want things
661661
# coming out as np.str_!
662662

663-
dtype = np.object_
663+
dtype = np.dtype(object)
664664

665665
elif isinstance(val, (np.datetime64, datetime)):
666666
val = tslibs.Timestamp(val)
@@ -671,30 +671,30 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False):
671671
dtype = DatetimeTZDtype(unit="ns", tz=val.tz)
672672
else:
673673
# return datetimetz as object
674-
return np.object_, val
674+
return np.dtype(object), val
675675
val = val.value
676676

677677
elif isinstance(val, (np.timedelta64, timedelta)):
678678
val = tslibs.Timedelta(val).value
679679
dtype = np.dtype("m8[ns]")
680680

681681
elif is_bool(val):
682-
dtype = np.bool_
682+
dtype = np.dtype(np.bool_)
683683

684684
elif is_integer(val):
685685
if isinstance(val, np.integer):
686-
dtype = type(val)
686+
dtype = np.dtype(type(val))
687687
else:
688-
dtype = np.int64
688+
dtype = np.dtype(np.int64)
689689

690690
elif is_float(val):
691691
if isinstance(val, np.floating):
692-
dtype = type(val)
692+
dtype = np.dtype(type(val))
693693
else:
694-
dtype = np.float64
694+
dtype = np.dtype(np.float64)
695695

696696
elif is_complex(val):
697-
dtype = np.complex_
697+
dtype = np.dtype(np.complex_)
698698

699699
elif pandas_dtype:
700700
if lib.is_period(val):
@@ -707,7 +707,8 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False):
707707
return dtype, val
708708

709709

710-
def infer_dtype_from_array(arr, pandas_dtype: bool = False):
710+
# TODO: try to make the Any in the return annotation more specific
711+
def infer_dtype_from_array(arr, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]:
711712
"""
712713
Infer the dtype from an array.
713714
@@ -738,7 +739,7 @@ def infer_dtype_from_array(arr, pandas_dtype: bool = False):
738739
array(['1', '1'], dtype='<U21')
739740
740741
>>> infer_dtype_from_array([1, '1'])
741-
(<class 'numpy.object_'>, [1, '1'])
742+
(dtype('O'), [1, '1'])
742743
"""
743744
if isinstance(arr, np.ndarray):
744745
return arr.dtype, arr
@@ -755,7 +756,7 @@ def infer_dtype_from_array(arr, pandas_dtype: bool = False):
755756
# don't force numpy coerce with nan's
756757
inferred = lib.infer_dtype(arr, skipna=False)
757758
if inferred in ["string", "bytes", "mixed", "mixed-integer"]:
758-
return (np.object_, arr)
759+
return (np.dtype(np.object_), arr)
759760

760761
arr = np.asarray(arr)
761762
return arr.dtype, arr
@@ -1469,7 +1470,7 @@ def find_common_type(types):
14691470
return np.find_common_type(types, [])
14701471

14711472

1472-
def cast_scalar_to_array(shape, value, dtype=None):
1473+
def cast_scalar_to_array(shape, value, dtype: Optional[DtypeObj] = None) -> np.ndarray:
14731474
"""
14741475
Create np.ndarray of specified shape and dtype, filled with values.
14751476
@@ -1496,7 +1497,9 @@ def cast_scalar_to_array(shape, value, dtype=None):
14961497
return values
14971498

14981499

1499-
def construct_1d_arraylike_from_scalar(value, length: int, dtype):
1500+
def construct_1d_arraylike_from_scalar(
1501+
value, length: int, dtype: DtypeObj
1502+
) -> ArrayLike:
15001503
"""
15011504
create a np.ndarray / pandas type of specified shape and dtype
15021505
filled with values
@@ -1505,7 +1508,7 @@ def construct_1d_arraylike_from_scalar(value, length: int, dtype):
15051508
----------
15061509
value : scalar value
15071510
length : int
1508-
dtype : pandas_dtype / np.dtype
1511+
dtype : pandas_dtype or np.dtype
15091512
15101513
Returns
15111514
-------
@@ -1517,8 +1520,6 @@ def construct_1d_arraylike_from_scalar(value, length: int, dtype):
15171520
subarr = cls._from_sequence([value] * length, dtype=dtype)
15181521

15191522
else:
1520-
if not isinstance(dtype, (np.dtype, type(np.dtype))):
1521-
dtype = dtype.dtype
15221523

15231524
if length and is_integer_dtype(dtype) and isna(value):
15241525
# coerce if we have nan for an integer dtype
@@ -1536,7 +1537,7 @@ def construct_1d_arraylike_from_scalar(value, length: int, dtype):
15361537
return subarr
15371538

15381539

1539-
def construct_1d_object_array_from_listlike(values):
1540+
def construct_1d_object_array_from_listlike(values) -> np.ndarray:
15401541
"""
15411542
Transform any list-like object in a 1-dimensional numpy array of object
15421543
dtype.
@@ -1561,7 +1562,9 @@ def construct_1d_object_array_from_listlike(values):
15611562
return result
15621563

15631564

1564-
def construct_1d_ndarray_preserving_na(values, dtype=None, copy: bool = False):
1565+
def construct_1d_ndarray_preserving_na(
1566+
values, dtype: Optional[DtypeObj] = None, copy: bool = False
1567+
) -> np.ndarray:
15651568
"""
15661569
Construct a new ndarray, coercing `values` to `dtype`, preserving NA.
15671570

pandas/core/frame.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@
104104
is_scalar,
105105
is_sequence,
106106
needs_i8_conversion,
107+
pandas_dtype,
107108
)
108109
from pandas.core.dtypes.generic import (
109110
ABCDataFrame,
@@ -1917,7 +1918,12 @@ def to_records(
19171918

19181919
@classmethod
19191920
def _from_arrays(
1920-
cls, arrays, columns, index, dtype=None, verify_integrity=True
1921+
cls,
1922+
arrays,
1923+
columns,
1924+
index,
1925+
dtype: Optional[Dtype] = None,
1926+
verify_integrity: bool = True,
19211927
) -> "DataFrame":
19221928
"""
19231929
Create DataFrame from a list of arrays corresponding to the columns.
@@ -1943,6 +1949,9 @@ def _from_arrays(
19431949
-------
19441950
DataFrame
19451951
"""
1952+
if dtype is not None:
1953+
dtype = pandas_dtype(dtype)
1954+
19461955
mgr = arrays_to_mgr(
19471956
arrays,
19481957
columns,

0 commit comments

Comments
 (0)