Skip to content

CLN: remove pandas_dtype kwd from infer_dtype_from_x #53064

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/array_algos/putmask.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def setitem_datetimelike_compat(values: np.ndarray, num_set: int, other):
other : Any
"""
if values.dtype == object:
dtype, _ = infer_dtype_from(other, pandas_dtype=True)
dtype, _ = infer_dtype_from(other)

if isinstance(dtype, np.dtype) and dtype.kind in "mM":
# https://github.com/numpy/numpy/issues/12550
Expand Down
62 changes: 18 additions & 44 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,7 @@ def _maybe_promote(dtype: np.dtype, fill_value=np.nan):

# returns tuple of (dtype, fill_value)
if issubclass(dtype.type, np.datetime64):
inferred, fv = infer_dtype_from_scalar(fill_value, pandas_dtype=True)
inferred, fv = infer_dtype_from_scalar(fill_value)
if inferred == dtype:
return dtype, fv

Expand All @@ -645,7 +645,7 @@ def _maybe_promote(dtype: np.dtype, fill_value=np.nan):
return _dtype_obj, fill_value

elif issubclass(dtype.type, np.timedelta64):
inferred, fv = infer_dtype_from_scalar(fill_value, pandas_dtype=True)
inferred, fv = infer_dtype_from_scalar(fill_value)
if inferred == dtype:
return dtype, fv

Expand Down Expand Up @@ -735,33 +735,26 @@ def _ensure_dtype_type(value, dtype: np.dtype):
return dtype.type(value)


def infer_dtype_from(val, pandas_dtype: bool = False) -> tuple[DtypeObj, Any]:
def infer_dtype_from(val) -> tuple[DtypeObj, Any]:
"""
Interpret the dtype from a scalar or array.

Parameters
----------
val : object
pandas_dtype : bool, default False
whether to infer dtype including pandas extension types.
If False, scalar/array belongs to pandas extension types is inferred as
object
"""
if not is_list_like(val):
return infer_dtype_from_scalar(val, pandas_dtype=pandas_dtype)
return infer_dtype_from_array(val, pandas_dtype=pandas_dtype)
return infer_dtype_from_scalar(val)
return infer_dtype_from_array(val)


def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj, Any]:
def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]:
"""
Interpret the dtype from a scalar.

Parameters
----------
pandas_dtype : bool, default False
whether to infer dtype including pandas extension types.
If False, scalar belongs to pandas extension types is inferred as
object
val : object
"""
dtype: DtypeObj = _dtype_obj

Expand Down Expand Up @@ -796,11 +789,7 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj,
dtype = val.dtype
# TODO: test with datetime(2920, 10, 1) based on test_replace_dtypes
else:
if pandas_dtype:
dtype = DatetimeTZDtype(unit="ns", tz=val.tz)
else:
# return datetimetz as object
return _dtype_obj, val
dtype = DatetimeTZDtype(unit="ns", tz=val.tz)

elif isinstance(val, (np.timedelta64, dt.timedelta)):
try:
Expand Down Expand Up @@ -834,12 +823,11 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj,
elif is_complex(val):
dtype = np.dtype(np.complex_)

elif pandas_dtype:
if lib.is_period(val):
dtype = PeriodDtype(freq=val.freq)
elif lib.is_interval(val):
subtype = infer_dtype_from_scalar(val.left, pandas_dtype=True)[0]
dtype = IntervalDtype(subtype=subtype, closed=val.closed)
if lib.is_period(val):
dtype = PeriodDtype(freq=val.freq)
elif lib.is_interval(val):
subtype = infer_dtype_from_scalar(val.left)[0]
dtype = IntervalDtype(subtype=subtype, closed=val.closed)

return dtype, val

Expand All @@ -859,32 +847,18 @@ def dict_compat(d: dict[Scalar, Scalar]) -> dict[Scalar, Scalar]:
return {maybe_box_datetimelike(key): value for key, value in d.items()}


def infer_dtype_from_array(
arr, pandas_dtype: bool = False
) -> tuple[DtypeObj, ArrayLike]:
def infer_dtype_from_array(arr) -> tuple[DtypeObj, ArrayLike]:
"""
Infer the dtype from an array.

Parameters
----------
arr : array
pandas_dtype : bool, default False
whether to infer dtype including pandas extension types.
If False, array belongs to pandas extension types
is inferred as object

Returns
-------
tuple (numpy-compat/pandas-compat dtype, array)

Notes
-----
if pandas_dtype=False. these infer to numpy dtypes
exactly with the exception that mixed / object dtypes
are not coerced by stringifying or conversion
tuple (pandas-compat dtype, array)

if pandas_dtype=True. datetime64tz-aware/categorical
types will retain there character.

Examples
--------
Expand All @@ -901,7 +875,7 @@ def infer_dtype_from_array(
raise TypeError("'arr' must be list-like")

arr_dtype = getattr(arr, "dtype", None)
if pandas_dtype and isinstance(arr_dtype, ExtensionDtype):
if isinstance(arr_dtype, ExtensionDtype):
return arr.dtype, arr

elif isinstance(arr, ABCSeries):
Expand Down Expand Up @@ -1303,7 +1277,7 @@ def find_result_type(left: ArrayLike, right: Any) -> DtypeObj:
new_dtype = ensure_dtype_can_hold_na(left.dtype)

else:
dtype, _ = infer_dtype_from(right, pandas_dtype=True)
dtype, _ = infer_dtype_from(right)

new_dtype = find_common_type([left.dtype, dtype])

Expand Down Expand Up @@ -1466,7 +1440,7 @@ def construct_1d_arraylike_from_scalar(

if dtype is None:
try:
dtype, value = infer_dtype_from_scalar(value, pandas_dtype=True)
dtype, value = infer_dtype_from_scalar(value)
except OutOfBoundsDatetime:
dtype = _dtype_obj

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -824,7 +824,7 @@ def __init__(
columns = ensure_index(columns)

if not dtype:
dtype, _ = infer_dtype_from_scalar(data, pandas_dtype=True)
dtype, _ = infer_dtype_from_scalar(data)

# For data is a scalar extension dtype
if isinstance(dtype, ExtensionDtype):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6088,7 +6088,7 @@ def _find_common_type_compat(self, target) -> DtypeObj:
Implementation of find_common_type that adjusts for Index-specific
special cases.
"""
target_dtype, _ = infer_dtype_from(target, pandas_dtype=True)
target_dtype, _ = infer_dtype_from(target)

# special case: if one dtype is uint64 and the other a signed int, return object
# See https://github.com/pandas-dev/pandas/issues/26778 for discussion
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,7 @@ def _maybe_convert_i8(self, key):

if scalar:
# Timestamp/Timedelta
key_dtype, key_i8 = infer_dtype_from_scalar(key, pandas_dtype=True)
key_dtype, key_i8 = infer_dtype_from_scalar(key)
if lib.is_period(key):
key_i8 = key.ordinal
elif isinstance(key_i8, Timestamp):
Expand Down
10 changes: 3 additions & 7 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
new_block,
to_native_types,
)
from pandas.core.internals.managers import make_na_array

if TYPE_CHECKING:
from pandas._typing import (
Expand Down Expand Up @@ -665,13 +666,8 @@ def _make_na_array(self, fill_value=None, use_na_proxy: bool = False):
fill_value = np.nan

dtype, fill_value = infer_dtype_from_scalar(fill_value)
# error: Argument "dtype" to "empty" has incompatible type "Union[dtype[Any],
# ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str,
# Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any],
# _DTypeDict, Tuple[Any, Any]]]"
values = np.empty(self.shape_proper[0], dtype=dtype) # type: ignore[arg-type]
values.fill(fill_value)
return values
array_values = make_na_array(dtype, self.shape_proper[:1], fill_value)
return array_values

def _equal_values(self, other) -> bool:
"""
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -921,7 +921,7 @@ def _make_na_block(

shape = (len(placement), self.shape[1])

dtype, fill_value = infer_dtype_from_scalar(fill_value, pandas_dtype=True)
dtype, fill_value = infer_dtype_from_scalar(fill_value)
block_values = make_na_array(dtype, shape, fill_value)
return new_block_2d(block_values, placement=placement)

Expand Down
13 changes: 8 additions & 5 deletions pandas/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,14 @@ def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]:
# known to be holdable by arr.
# When called from Series._single_replace, values_to_mask is tuple or list
dtype, values_to_mask = infer_dtype_from(values_to_mask)
# error: Argument "dtype" to "array" has incompatible type "Union[dtype[Any],
# ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str,
# Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any],
# _DTypeDict, Tuple[Any, Any]]]"
values_to_mask = np.array(values_to_mask, dtype=dtype) # type: ignore[arg-type]

if isinstance(dtype, np.dtype):
values_to_mask = np.array(values_to_mask, dtype=dtype)
else:
cls = dtype.construct_array_type()
if not lib.is_list_like(values_to_mask):
values_to_mask = [values_to_mask]
values_to_mask = cls._from_sequence(values_to_mask, dtype=dtype, copy=False)

potential_na = False
if is_object_dtype(arr.dtype):
Expand Down
92 changes: 39 additions & 53 deletions pandas/tests/dtypes/cast/test_infer_dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,6 @@
)


@pytest.fixture(params=[True, False])
def pandas_dtype(request):
return request.param


def test_infer_dtype_from_int_scalar(any_int_numpy_dtype):
# Test that infer_dtype_from_scalar is
# returning correct dtype for int and float.
Expand Down Expand Up @@ -81,36 +76,32 @@ def test_infer_dtype_from_timedelta(data):


@pytest.mark.parametrize("freq", ["M", "D"])
def test_infer_dtype_from_period(freq, pandas_dtype):
def test_infer_dtype_from_period(freq):
p = Period("2011-01-01", freq=freq)
dtype, val = infer_dtype_from_scalar(p, pandas_dtype=pandas_dtype)
dtype, val = infer_dtype_from_scalar(p)

if pandas_dtype:
exp_dtype = f"period[{freq}]"
else:
exp_dtype = np.object_
exp_dtype = f"period[{freq}]"

assert dtype == exp_dtype
assert val == p


@pytest.mark.parametrize(
"data", [date(2000, 1, 1), "foo", Timestamp(1, tz="US/Eastern")]
)
def test_infer_dtype_misc(data):
dtype, val = infer_dtype_from_scalar(data)
def test_infer_dtype_misc():
dt = date(2000, 1, 1)
dtype, val = infer_dtype_from_scalar(dt)
assert dtype == np.object_

ts = Timestamp(1, tz="US/Eastern")
dtype, val = infer_dtype_from_scalar(ts)
assert dtype == "datetime64[ns, US/Eastern]"


@pytest.mark.parametrize("tz", ["UTC", "US/Eastern", "Asia/Tokyo"])
def test_infer_from_scalar_tz(tz, pandas_dtype):
def test_infer_from_scalar_tz(tz):
dt = Timestamp(1, tz=tz)
dtype, val = infer_dtype_from_scalar(dt, pandas_dtype=pandas_dtype)
dtype, val = infer_dtype_from_scalar(dt)

if pandas_dtype:
exp_dtype = f"datetime64[ns, {tz}]"
else:
exp_dtype = np.object_
exp_dtype = f"datetime64[ns, {tz}]"

assert dtype == exp_dtype
assert val == dt
Expand All @@ -126,11 +117,11 @@ def test_infer_from_scalar_tz(tz, pandas_dtype):
(Timedelta(0), Timedelta(1), "timedelta64[ns]"),
],
)
def test_infer_from_interval(left, right, subtype, closed, pandas_dtype):
def test_infer_from_interval(left, right, subtype, closed):
# GH 30337
interval = Interval(left, right, closed)
result_dtype, result_value = infer_dtype_from_scalar(interval, pandas_dtype)
expected_dtype = f"interval[{subtype}, {closed}]" if pandas_dtype else np.object_
result_dtype, result_value = infer_dtype_from_scalar(interval)
expected_dtype = f"interval[{subtype}, {closed}]"
assert result_dtype == expected_dtype
assert result_value == interval

Expand All @@ -143,54 +134,49 @@ def test_infer_dtype_from_scalar_errors():


@pytest.mark.parametrize(
"value, expected, pandas_dtype",
"value, expected",
[
("foo", np.object_, False),
(b"foo", np.object_, False),
(1, np.int64, False),
(1.5, np.float_, False),
(np.datetime64("2016-01-01"), np.dtype("M8[ns]"), False),
(Timestamp("20160101"), np.dtype("M8[ns]"), False),
(Timestamp("20160101", tz="UTC"), np.object_, False),
(Timestamp("20160101", tz="UTC"), "datetime64[ns, UTC]", True),
("foo", np.object_),
(b"foo", np.object_),
(1, np.int64),
(1.5, np.float_),
(np.datetime64("2016-01-01"), np.dtype("M8[ns]")),
(Timestamp("20160101"), np.dtype("M8[ns]")),
(Timestamp("20160101", tz="UTC"), "datetime64[ns, UTC]"),
],
)
def test_infer_dtype_from_scalar(value, expected, pandas_dtype):
dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=pandas_dtype)
def test_infer_dtype_from_scalar(value, expected):
dtype, _ = infer_dtype_from_scalar(value)
assert is_dtype_equal(dtype, expected)

with pytest.raises(TypeError, match="must be list-like"):
infer_dtype_from_array(value, pandas_dtype=pandas_dtype)
infer_dtype_from_array(value)


@pytest.mark.parametrize(
"arr, expected, pandas_dtype",
"arr, expected",
[
([1], np.int_, False),
(np.array([1], dtype=np.int64), np.int64, False),
([np.nan, 1, ""], np.object_, False),
(np.array([[1.0, 2.0]]), np.float_, False),
(Categorical(list("aabc")), np.object_, False),
(Categorical([1, 2, 3]), np.int64, False),
(Categorical(list("aabc")), "category", True),
(Categorical([1, 2, 3]), "category", True),
(date_range("20160101", periods=3), np.dtype("=M8[ns]"), False),
([1], np.int_),
(np.array([1], dtype=np.int64), np.int64),
([np.nan, 1, ""], np.object_),
(np.array([[1.0, 2.0]]), np.float_),
(Categorical(list("aabc")), "category"),
(Categorical([1, 2, 3]), "category"),
(date_range("20160101", periods=3), np.dtype("=M8[ns]")),
(
date_range("20160101", periods=3, tz="US/Eastern"),
"datetime64[ns, US/Eastern]",
True,
),
(Series([1.0, 2, 3]), np.float64, False),
(Series(list("abc")), np.object_, False),
(Series([1.0, 2, 3]), np.float64),
(Series(list("abc")), np.object_),
(
Series(date_range("20160101", periods=3, tz="US/Eastern")),
"datetime64[ns, US/Eastern]",
True,
),
],
)
def test_infer_dtype_from_array(arr, expected, pandas_dtype):
dtype, _ = infer_dtype_from_array(arr, pandas_dtype=pandas_dtype)
def test_infer_dtype_from_array(arr, expected):
dtype, _ = infer_dtype_from_array(arr)
assert is_dtype_equal(dtype, expected)


Expand Down