Skip to content

Commit 42fb95b

Browse files
jbrockmendelquintusdias
authored andcommitted
CLN: simplify maybe_convert_objects, soft_convert_objects (pandas-dev#27444)
1 parent dc12757 commit 42fb95b

File tree

6 files changed

+77
-97
lines changed

6 files changed

+77
-97
lines changed

pandas/core/dtypes/cast.py

+41-56
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from pandas._libs import lib, tslib, tslibs
88
from pandas._libs.tslibs import NaT, OutOfBoundsDatetime, Period, iNaT
9+
from pandas.util._validators import validate_bool_kwarg
910

1011
from .common import (
1112
_INT64_DTYPE,
@@ -696,9 +697,7 @@ def astype_nansafe(arr, dtype, copy=True, skipna=False):
696697
elif np.issubdtype(arr.dtype, np.floating) and np.issubdtype(dtype, np.integer):
697698

698699
if not np.isfinite(arr).all():
699-
raise ValueError(
700-
"Cannot convert non-finite values (NA or inf) to " "integer"
701-
)
700+
raise ValueError("Cannot convert non-finite values (NA or inf) to integer")
702701

703702
elif is_object_dtype(arr):
704703

@@ -719,9 +718,7 @@ def astype_nansafe(arr, dtype, copy=True, skipna=False):
719718
return astype_nansafe(to_timedelta(arr).values, dtype, copy=copy)
720719

721720
if dtype.name in ("datetime64", "timedelta64"):
722-
msg = (
723-
"The '{dtype}' dtype has no unit. " "Please pass in '{dtype}[ns]' instead."
724-
)
721+
msg = "The '{dtype}' dtype has no unit. Please pass in '{dtype}[ns]' instead."
725722
raise ValueError(msg.format(dtype=dtype.name))
726723

727724
if copy or is_object_dtype(arr) or is_object_dtype(dtype):
@@ -731,50 +728,33 @@ def astype_nansafe(arr, dtype, copy=True, skipna=False):
731728
return arr.view(dtype)
732729

733730

734-
def maybe_convert_objects(
735-
values, convert_dates=True, convert_numeric=True, convert_timedeltas=True, copy=True
736-
):
737-
""" if we have an object dtype, try to coerce dates and/or numbers """
738-
739-
# if we have passed in a list or scalar
740-
if isinstance(values, (list, tuple)):
741-
values = np.array(values, dtype=np.object_)
742-
if not hasattr(values, "dtype"):
743-
values = np.array([values], dtype=np.object_)
731+
def maybe_convert_objects(values: np.ndarray, convert_numeric: bool = True):
732+
"""
733+
If we have an object dtype array, try to coerce dates and/or numbers.
744734
745-
# convert dates
746-
if convert_dates and values.dtype == np.object_:
735+
Parameters
736+
----------
737+
values : ndarray
738+
convert_numeric : bool, default True
747739
748-
# we take an aggressive stance and convert to datetime64[ns]
749-
if convert_dates == "coerce":
750-
new_values = maybe_cast_to_datetime(values, "M8[ns]", errors="coerce")
740+
Returns
741+
-------
742+
ndarray or DatetimeIndex
743+
"""
744+
validate_bool_kwarg(convert_numeric, "convert_numeric")
751745

752-
# if we are all nans then leave me alone
753-
if not isna(new_values).all():
754-
values = new_values
746+
orig_values = values
755747

756-
else:
757-
values = lib.maybe_convert_objects(values, convert_datetime=convert_dates)
748+
# convert dates
749+
if is_object_dtype(values.dtype):
750+
values = lib.maybe_convert_objects(values, convert_datetime=True)
758751

759752
# convert timedeltas
760-
if convert_timedeltas and values.dtype == np.object_:
761-
762-
if convert_timedeltas == "coerce":
763-
from pandas.core.tools.timedeltas import to_timedelta
764-
765-
new_values = to_timedelta(values, errors="coerce")
766-
767-
# if we are all nans then leave me alone
768-
if not isna(new_values).all():
769-
values = new_values
770-
771-
else:
772-
values = lib.maybe_convert_objects(
773-
values, convert_timedelta=convert_timedeltas
774-
)
753+
if is_object_dtype(values.dtype):
754+
values = lib.maybe_convert_objects(values, convert_timedelta=True)
775755

776756
# convert to numeric
777-
if values.dtype == np.object_:
757+
if is_object_dtype(values.dtype):
778758
if convert_numeric:
779759
try:
780760
new_values = lib.maybe_convert_numeric(
@@ -791,33 +771,38 @@ def maybe_convert_objects(
791771
# soft-conversion
792772
values = lib.maybe_convert_objects(values)
793773

794-
values = values.copy() if copy else values
774+
if values is orig_values:
775+
values = values.copy()
795776

796777
return values
797778

798779

799780
def soft_convert_objects(
800-
values, datetime=True, numeric=True, timedelta=True, coerce=False, copy=True
781+
values: np.ndarray,
782+
datetime: bool = True,
783+
numeric: bool = True,
784+
timedelta: bool = True,
785+
coerce: bool = False,
786+
copy: bool = True,
801787
):
802788
""" if we have an object dtype, try to coerce dates and/or numbers """
803789

790+
validate_bool_kwarg(datetime, "datetime")
791+
validate_bool_kwarg(numeric, "numeric")
792+
validate_bool_kwarg(timedelta, "timedelta")
793+
validate_bool_kwarg(coerce, "coerce")
794+
validate_bool_kwarg(copy, "copy")
795+
804796
conversion_count = sum((datetime, numeric, timedelta))
805797
if conversion_count == 0:
806-
raise ValueError(
807-
"At least one of datetime, numeric or timedelta must " "be True."
808-
)
798+
raise ValueError("At least one of datetime, numeric or timedelta must be True.")
809799
elif conversion_count > 1 and coerce:
810800
raise ValueError(
811801
"Only one of 'datetime', 'numeric' or "
812802
"'timedelta' can be True when when coerce=True."
813803
)
814804

815-
if isinstance(values, (list, tuple)):
816-
# List or scalar
817-
values = np.array(values, dtype=np.object_)
818-
elif not hasattr(values, "dtype"):
819-
values = np.array([values], dtype=np.object_)
820-
elif not is_object_dtype(values.dtype):
805+
if not is_object_dtype(values.dtype):
821806
# If not object, do not attempt conversion
822807
values = values.copy() if copy else values
823808
return values
@@ -843,13 +828,13 @@ def soft_convert_objects(
843828
# GH 20380, when datetime is beyond year 2262, hence outside
844829
# bound of nanosecond-resolution 64-bit integers.
845830
try:
846-
values = lib.maybe_convert_objects(values, convert_datetime=datetime)
831+
values = lib.maybe_convert_objects(values, convert_datetime=True)
847832
except OutOfBoundsDatetime:
848833
pass
849834

850835
if timedelta and is_object_dtype(values.dtype):
851836
# Object check to ensure only run if previous did not convert
852-
values = lib.maybe_convert_objects(values, convert_timedelta=timedelta)
837+
values = lib.maybe_convert_objects(values, convert_timedelta=True)
853838

854839
if numeric and is_object_dtype(values.dtype):
855840
try:
@@ -1368,7 +1353,7 @@ def maybe_cast_to_integer_array(arr, dtype, copy=False):
13681353
arr = np.asarray(arr)
13691354

13701355
if is_unsigned_integer_dtype(dtype) and (arr < 0).any():
1371-
raise OverflowError("Trying to coerce negative values " "to unsigned integers")
1356+
raise OverflowError("Trying to coerce negative values to unsigned integers")
13721357

13731358
if is_integer_dtype(dtype) and (is_float_dtype(arr) or is_object_dtype(arr)):
13741359
raise ValueError("Trying to coerce float values to integers")

pandas/core/generic.py

+5
Original file line numberDiff line numberDiff line change
@@ -6033,6 +6033,11 @@ def _convert(
60336033
-------
60346034
converted : same as input object
60356035
"""
6036+
validate_bool_kwarg(datetime, "datetime")
6037+
validate_bool_kwarg(numeric, "numeric")
6038+
validate_bool_kwarg(timedelta, "timedelta")
6039+
validate_bool_kwarg(coerce, "coerce")
6040+
validate_bool_kwarg(copy, "copy")
60366041
return self._constructor(
60376042
self._data.convert(
60386043
datetime=datetime,

pandas/core/internals/blocks.py

+28-32
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
find_common_type,
1919
infer_dtype_from,
2020
infer_dtype_from_scalar,
21-
maybe_convert_objects,
2221
maybe_downcast_to_dtype,
2322
maybe_infer_dtype_type,
2423
maybe_promote,
@@ -669,7 +668,14 @@ def _astype(self, dtype, copy=False, errors="raise", values=None, **kwargs):
669668
)
670669
return newb
671670

672-
def convert(self, copy=True, **kwargs):
671+
def convert(
672+
self,
673+
copy: bool = True,
674+
datetime: bool = True,
675+
numeric: bool = True,
676+
timedelta: bool = True,
677+
coerce: bool = False,
678+
):
673679
""" attempt to coerce any object types to better types return a copy
674680
of the block (if copy = True) by definition we are not an ObjectBlock
675681
here!
@@ -827,9 +833,7 @@ def replace(
827833
convert=convert,
828834
)
829835
if convert:
830-
blocks = [
831-
b.convert(by_item=True, numeric=False, copy=not inplace) for b in blocks
832-
]
836+
blocks = [b.convert(numeric=False, copy=not inplace) for b in blocks]
833837
return blocks
834838

835839
def _replace_single(self, *args, **kwargs):
@@ -2779,45 +2783,39 @@ def is_bool(self):
27792783
"""
27802784
return lib.is_bool_array(self.values.ravel())
27812785

2782-
# TODO: Refactor when convert_objects is removed since there will be 1 path
2783-
def convert(self, *args, **kwargs):
2786+
def convert(
2787+
self,
2788+
copy: bool = True,
2789+
datetime: bool = True,
2790+
numeric: bool = True,
2791+
timedelta: bool = True,
2792+
coerce: bool = False,
2793+
):
27842794
""" attempt to coerce any object types to better types return a copy of
27852795
the block (if copy = True) by definition we ARE an ObjectBlock!!!!!
27862796
27872797
can return multiple blocks!
27882798
"""
27892799

2790-
if args:
2791-
raise NotImplementedError
2792-
by_item = kwargs.get("by_item", True)
2793-
2794-
new_inputs = ["coerce", "datetime", "numeric", "timedelta"]
2795-
new_style = False
2796-
for kw in new_inputs:
2797-
new_style |= kw in kwargs
2798-
2799-
if new_style:
2800-
fn = soft_convert_objects
2801-
fn_inputs = new_inputs
2802-
else:
2803-
fn = maybe_convert_objects
2804-
fn_inputs = ["convert_dates", "convert_numeric", "convert_timedeltas"]
2805-
fn_inputs += ["copy"]
2806-
2807-
fn_kwargs = {key: kwargs[key] for key in fn_inputs if key in kwargs}
2808-
28092800
# operate column-by-column
28102801
def f(m, v, i):
28112802
shape = v.shape
2812-
values = fn(v.ravel(), **fn_kwargs)
2803+
values = soft_convert_objects(
2804+
v.ravel(),
2805+
datetime=datetime,
2806+
numeric=numeric,
2807+
timedelta=timedelta,
2808+
coerce=coerce,
2809+
copy=copy,
2810+
)
28132811
if isinstance(values, np.ndarray):
28142812
# TODO: allow EA once reshape is supported
28152813
values = values.reshape(shape)
28162814

28172815
values = _block_shape(values, ndim=self.ndim)
28182816
return values
28192817

2820-
if by_item and not self._is_single_block:
2818+
if self.ndim == 2:
28212819
blocks = self.split_and_operate(None, f, False)
28222820
else:
28232821
values = f(None, self.values.ravel(), None)
@@ -3041,7 +3039,7 @@ def re_replacer(s):
30413039
# convert
30423040
block = self.make_block(new_values)
30433041
if convert:
3044-
block = block.convert(by_item=True, numeric=False)
3042+
block = block.convert(numeric=False)
30453043
return block
30463044

30473045
def _replace_coerce(
@@ -3080,9 +3078,7 @@ def _replace_coerce(
30803078
mask=mask,
30813079
)
30823080
if convert:
3083-
block = [
3084-
b.convert(by_item=True, numeric=False, copy=True) for b in block
3085-
]
3081+
block = [b.convert(numeric=False, copy=True) for b in block]
30863082
return block
30873083
return self
30883084

pandas/core/internals/managers.py

-1
Original file line numberDiff line numberDiff line change
@@ -1551,7 +1551,6 @@ def index(self):
15511551

15521552
def convert(self, **kwargs):
15531553
""" convert the whole block as one """
1554-
kwargs["by_item"] = False
15551554
return self.apply("convert", **kwargs)
15561555

15571556
@property

pandas/tests/dtypes/cast/test_convert_objects.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,8 @@
55

66

77
@pytest.mark.parametrize("data", [[1, 2], ["apply", "banana"]])
8-
@pytest.mark.parametrize("copy", [True, False])
9-
def test_maybe_convert_objects_copy(data, copy):
8+
def test_maybe_convert_objects_copy(data):
109
arr = np.array(data)
11-
out = maybe_convert_objects(arr, copy=copy)
10+
out = maybe_convert_objects(arr)
1211

13-
assert (arr is out) is (not copy)
12+
assert arr is not out

pandas/tests/internals/test_internals.py

-4
Original file line numberDiff line numberDiff line change
@@ -584,10 +584,6 @@ def _compare(old_mgr, new_mgr):
584584
new_mgr = mgr.convert()
585585
_compare(mgr, new_mgr)
586586

587-
mgr = create_mgr("a, b: object; f: i8; g: f8")
588-
new_mgr = mgr.convert()
589-
_compare(mgr, new_mgr)
590-
591587
# convert
592588
mgr = create_mgr("a,b,foo: object; f: i8; g: f8")
593589
mgr.set("a", np.array(["1"] * N, dtype=np.object_))

0 commit comments

Comments
 (0)