Skip to content

Commit d115452

Browse files
jorisvandenbosscheJulianWgs
authored andcommitted
PERF: optimize is_numeric_v_string_like (pandas-dev#40501)
1 parent c0af000 commit d115452

File tree

3 files changed

+8
-61
lines changed

3 files changed

+8
-61
lines changed

pandas/core/dtypes/common.py

+6-47
Original file line numberDiff line numberDiff line change
@@ -1100,15 +1100,15 @@ def is_datetime_or_timedelta_dtype(arr_or_dtype) -> bool:
11001100

11011101

11021102
# This exists to silence numpy deprecation warnings, see GH#29553
1103-
def is_numeric_v_string_like(a, b):
1103+
def is_numeric_v_string_like(a: ArrayLike, b):
11041104
"""
11051105
Check if we are comparing a string-like object to a numeric ndarray.
11061106
NumPy doesn't like to compare such objects, especially numeric arrays
11071107
and scalar string-likes.
11081108
11091109
Parameters
11101110
----------
1111-
a : array-like, scalar
1111+
a : array-like
11121112
The first object to check.
11131113
b : array-like, scalar
11141114
The second object to check.
@@ -1120,16 +1120,8 @@ def is_numeric_v_string_like(a, b):
11201120
11211121
Examples
11221122
--------
1123-
>>> is_numeric_v_string_like(1, 1)
1124-
False
1125-
>>> is_numeric_v_string_like("foo", "foo")
1126-
False
1127-
>>> is_numeric_v_string_like(1, "foo") # non-array numeric
1128-
False
11291123
>>> is_numeric_v_string_like(np.array([1]), "foo")
11301124
True
1131-
>>> is_numeric_v_string_like("foo", np.array([1])) # symmetric check
1132-
True
11331125
>>> is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"]))
11341126
True
11351127
>>> is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2]))
@@ -1142,17 +1134,15 @@ def is_numeric_v_string_like(a, b):
11421134
is_a_array = isinstance(a, np.ndarray)
11431135
is_b_array = isinstance(b, np.ndarray)
11441136

1145-
is_a_numeric_array = is_a_array and is_numeric_dtype(a)
1146-
is_b_numeric_array = is_b_array and is_numeric_dtype(b)
1147-
is_a_string_array = is_a_array and is_string_like_dtype(a)
1148-
is_b_string_array = is_b_array and is_string_like_dtype(b)
1137+
is_a_numeric_array = is_a_array and a.dtype.kind in ("u", "i", "f", "c", "b")
1138+
is_b_numeric_array = is_b_array and b.dtype.kind in ("u", "i", "f", "c", "b")
1139+
is_a_string_array = is_a_array and a.dtype.kind in ("S", "U")
1140+
is_b_string_array = is_b_array and b.dtype.kind in ("S", "U")
11491141

1150-
is_a_scalar_string_like = not is_a_array and isinstance(a, str)
11511142
is_b_scalar_string_like = not is_b_array and isinstance(b, str)
11521143

11531144
return (
11541145
(is_a_numeric_array and is_b_scalar_string_like)
1155-
or (is_b_numeric_array and is_a_scalar_string_like)
11561146
or (is_a_numeric_array and is_b_string_array)
11571147
or (is_b_numeric_array and is_a_string_array)
11581148
)
@@ -1305,37 +1295,6 @@ def is_numeric_dtype(arr_or_dtype) -> bool:
13051295
)
13061296

13071297

1308-
def is_string_like_dtype(arr_or_dtype) -> bool:
1309-
"""
1310-
Check whether the provided array or dtype is of a string-like dtype.
1311-
1312-
Unlike `is_string_dtype`, the object dtype is excluded because it
1313-
is a mixed dtype.
1314-
1315-
Parameters
1316-
----------
1317-
arr_or_dtype : array-like
1318-
The array or dtype to check.
1319-
1320-
Returns
1321-
-------
1322-
boolean
1323-
Whether or not the array or dtype is of the string dtype.
1324-
1325-
Examples
1326-
--------
1327-
>>> is_string_like_dtype(str)
1328-
True
1329-
>>> is_string_like_dtype(object)
1330-
False
1331-
>>> is_string_like_dtype(np.array(['a', 'b']))
1332-
True
1333-
>>> is_string_like_dtype(pd.Series([1, 2]))
1334-
False
1335-
"""
1336-
return _is_dtype(arr_or_dtype, lambda dtype: dtype.kind in ("S", "U"))
1337-
1338-
13391298
def is_float_dtype(arr_or_dtype) -> bool:
13401299
"""
13411300
Check whether the provided array or dtype is of a float dtype.

pandas/core/dtypes/missing.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
is_object_dtype,
3636
is_scalar,
3737
is_string_dtype,
38-
is_string_like_dtype,
3938
needs_i8_conversion,
4039
)
4140
from pandas.core.dtypes.dtypes import ExtensionDtype
@@ -258,7 +257,7 @@ def _isna_string_dtype(values: np.ndarray, inf_as_na: bool) -> np.ndarray:
258257
dtype = values.dtype
259258
shape = values.shape
260259

261-
if is_string_like_dtype(dtype):
260+
if dtype.kind in ("S", "U"):
262261
result = np.zeros(values.shape, dtype=bool)
263262
else:
264263
result = np.empty(shape, dtype=bool)

pandas/tests/dtypes/test_common.py

+1-12
Original file line numberDiff line numberDiff line change
@@ -469,14 +469,11 @@ def test_is_datetime_or_timedelta_dtype():
469469

470470

471471
def test_is_numeric_v_string_like():
472-
assert not com.is_numeric_v_string_like(1, 1)
473-
assert not com.is_numeric_v_string_like(1, "foo")
474-
assert not com.is_numeric_v_string_like("foo", "foo")
472+
assert not com.is_numeric_v_string_like(np.array([1]), 1)
475473
assert not com.is_numeric_v_string_like(np.array([1]), np.array([2]))
476474
assert not com.is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"]))
477475

478476
assert com.is_numeric_v_string_like(np.array([1]), "foo")
479-
assert com.is_numeric_v_string_like("foo", np.array([1]))
480477
assert com.is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"]))
481478
assert com.is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2]))
482479

@@ -521,14 +518,6 @@ def test_is_numeric_dtype():
521518
assert com.is_numeric_dtype(pd.Index([1, 2.0]))
522519

523520

524-
def test_is_string_like_dtype():
525-
assert not com.is_string_like_dtype(object)
526-
assert not com.is_string_like_dtype(pd.Series([1, 2]))
527-
528-
assert com.is_string_like_dtype(str)
529-
assert com.is_string_like_dtype(np.array(["a", "b"]))
530-
531-
532521
def test_is_float_dtype():
533522
assert not com.is_float_dtype(str)
534523
assert not com.is_float_dtype(int)

0 commit comments

Comments
 (0)