Skip to content

CLN: avoid infer_dtype #53063

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 7 additions & 15 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,11 @@
from pandas.core.array_algos.quantile import quantile_with_mask
from pandas.core.arraylike import OpsMixin
from pandas.core.arrays.base import ExtensionArray
from pandas.core.construction import ensure_wrapped_if_datetimelike
from pandas.core.construction import (
array as pd_array,
ensure_wrapped_if_datetimelike,
extract_array,
)
from pandas.core.indexers import check_array_indexer
from pandas.core.ops import invalid_comparison

Expand Down Expand Up @@ -645,20 +649,8 @@ def _arith_method(self, other, op):
and len(other) == len(self)
):
# Try inferring masked dtype instead of casting to object
inferred_dtype = lib.infer_dtype(other, skipna=True)
if inferred_dtype == "integer":
from pandas.core.arrays import IntegerArray

other = IntegerArray._from_sequence(other)
elif inferred_dtype in ["floating", "mixed-integer-float"]:
from pandas.core.arrays import FloatingArray

other = FloatingArray._from_sequence(other)

elif inferred_dtype in ["boolean"]:
from pandas.core.arrays import BooleanArray

other = BooleanArray._from_sequence(other)
other = pd_array(other)
other = extract_array(other, extract_numpy=True)

if isinstance(other, BaseMaskedArray):
other, omask = other._data, other._mask
Expand Down
5 changes: 2 additions & 3 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1654,9 +1654,8 @@ def is_all_strings(value: ArrayLike) -> bool:
dtype = value.dtype

if isinstance(dtype, np.dtype):
return (
dtype == np.dtype("object")
and lib.infer_dtype(value, skipna=False) == "string"
return dtype == np.dtype("object") and lib.is_string_array(
np.asarray(value), skipna=False
)
elif isinstance(dtype, CategoricalDtype):
return dtype.categories.inferred_type == "string"
Expand Down
18 changes: 4 additions & 14 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@
)
from pandas.core.dtypes.common import (
is_1d_only_ea_dtype,
is_bool_dtype,
is_float_dtype,
is_integer_dtype,
is_list_like,
is_named_tuple,
Expand All @@ -44,14 +42,10 @@
algorithms,
common as com,
)
from pandas.core.arrays import (
BooleanArray,
ExtensionArray,
FloatingArray,
IntegerArray,
)
from pandas.core.arrays import ExtensionArray
from pandas.core.arrays.string_ import StringDtype
from pandas.core.construction import (
array as pd_array,
ensure_wrapped_if_datetimelike,
extract_array,
range_to_ndarray,
Expand Down Expand Up @@ -1027,12 +1021,8 @@ def convert(arr):
if dtype_backend != "numpy" and arr.dtype == np.dtype("O"):
arr = StringDtype().construct_array_type()._from_sequence(arr)
elif dtype_backend != "numpy" and isinstance(arr, np.ndarray):
if is_integer_dtype(arr.dtype):
arr = IntegerArray(arr, np.zeros(arr.shape, dtype=np.bool_))
elif is_bool_dtype(arr.dtype):
arr = BooleanArray(arr, np.zeros(arr.shape, dtype=np.bool_))
elif is_float_dtype(arr.dtype):
arr = FloatingArray(arr, np.isnan(arr))
if arr.dtype.kind in "iufb":
arr = pd_array(arr, copy=False)

elif isinstance(dtype, ExtensionDtype):
# TODO: test(s) that get here
Expand Down
3 changes: 1 addition & 2 deletions pandas/io/parsers/base_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,8 +757,7 @@ def _infer_types(
result = BooleanArray(result, bool_mask)
elif result.dtype == np.object_ and non_default_dtype_backend:
# read_excel sends array of datetime objects
inferred_type = lib.infer_dtype(result)
if inferred_type != "datetime":
if not lib.is_datetime_array(result, skipna=True):
result = StringDtype().construct_array_type()._from_sequence(values)

if dtype_backend == "pyarrow":
Expand Down