Skip to content

Commit 83c2e65

Browse files
authored
Gh 36562 typeerror comparison not supported between float and str (#37096)
1 parent d75eb5b commit 83c2e65

File tree

4 files changed

+70
-12
lines changed

4 files changed

+70
-12
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,7 @@ MultiIndex
469469

470470
- Bug in :meth:`DataFrame.xs` when used with :class:`IndexSlice` raises ``TypeError`` with message ``"Expected label or tuple of labels"`` (:issue:`35301`)
471471
- Bug in :meth:`DataFrame.reset_index` with ``NaT`` values in index raises ``ValueError`` with message ``"cannot convert float NaN to integer"`` (:issue:`36541`)
472+
- Bug in :meth:`DataFrame.combine_first` when used with :class:`MultiIndex` containing string and ``NaN`` values raises ``TypeError`` (:issue:`36562`)
472473

473474
I/O
474475
^^^

pandas/core/algorithms.py

+32-11
Original file line numberDiff line numberDiff line change
@@ -2061,27 +2061,25 @@ def safe_sort(
20612061
dtype, _ = infer_dtype_from_array(values)
20622062
values = np.asarray(values, dtype=dtype)
20632063

2064-
def sort_mixed(values):
2065-
# order ints before strings, safe in py3
2066-
str_pos = np.array([isinstance(x, str) for x in values], dtype=bool)
2067-
nums = np.sort(values[~str_pos])
2068-
strs = np.sort(values[str_pos])
2069-
return np.concatenate([nums, np.asarray(strs, dtype=object)])
2070-
20712064
sorter = None
2065+
20722066
if (
20732067
not is_extension_array_dtype(values)
20742068
and lib.infer_dtype(values, skipna=False) == "mixed-integer"
20752069
):
2076-
# unorderable in py3 if mixed str/int
2077-
ordered = sort_mixed(values)
2070+
ordered = _sort_mixed(values)
20782071
else:
20792072
try:
20802073
sorter = values.argsort()
20812074
ordered = values.take(sorter)
20822075
except TypeError:
2083-
# try this anyway
2084-
ordered = sort_mixed(values)
2076+
# Previous sorters failed or were not applicable, try `_sort_mixed`
2077+
# which would work, but which fails for special case of 1d arrays
2078+
# with tuples.
2079+
if values.size and isinstance(values[0], tuple):
2080+
ordered = _sort_tuples(values)
2081+
else:
2082+
ordered = _sort_mixed(values)
20852083

20862084
# codes:
20872085

@@ -2128,3 +2126,26 @@ def sort_mixed(values):
21282126
np.putmask(new_codes, mask, na_sentinel)
21292127

21302128
return ordered, ensure_platform_int(new_codes)
2129+
2130+
2131+
def _sort_mixed(values):
2132+
""" order ints before strings in 1d arrays, safe in py3 """
2133+
str_pos = np.array([isinstance(x, str) for x in values], dtype=bool)
2134+
nums = np.sort(values[~str_pos])
2135+
strs = np.sort(values[str_pos])
2136+
return np.concatenate([nums, np.asarray(strs, dtype=object)])
2137+
2138+
2139+
def _sort_tuples(values: np.ndarray[tuple]):
2140+
"""
2141+
Convert array of tuples (1d) to array or array (2d).
2142+
We need to keep the columns separately as they contain different types and
2143+
nans (can't use `np.sort` as it may fail when str and nan are mixed in a
2144+
column as types cannot be compared).
2145+
"""
2146+
from pandas.core.internals.construction import to_arrays
2147+
from pandas.core.sorting import lexsort_indexer
2148+
2149+
arrays, _ = to_arrays(values, None)
2150+
indexer = lexsort_indexer(arrays, orders=True)
2151+
return values[indexer]

pandas/tests/frame/methods/test_combine_first.py

+30-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pytest
55

66
import pandas as pd
7-
from pandas import DataFrame, Index, Series
7+
from pandas import DataFrame, Index, MultiIndex, Series
88
import pandas._testing as tm
99

1010

@@ -365,3 +365,32 @@ def test_combine_first_string_dtype_only_na(self):
365365
{"a": ["962", "85"], "b": [pd.NA] * 2}, dtype="string"
366366
).set_index(["a", "b"])
367367
tm.assert_frame_equal(result, expected)
368+
369+
370+
def test_combine_first_with_nan_multiindex():
371+
# gh-36562
372+
373+
mi1 = MultiIndex.from_arrays(
374+
[["b", "b", "c", "a", "b", np.nan], [1, 2, 3, 4, 5, 6]], names=["a", "b"]
375+
)
376+
df = DataFrame({"c": [1, 1, 1, 1, 1, 1]}, index=mi1)
377+
mi2 = MultiIndex.from_arrays(
378+
[["a", "b", "c", "a", "b", "d"], [1, 1, 1, 1, 1, 1]], names=["a", "b"]
379+
)
380+
s = Series([1, 2, 3, 4, 5, 6], index=mi2)
381+
res = df.combine_first(DataFrame({"d": s}))
382+
mi_expected = MultiIndex.from_arrays(
383+
[
384+
["a", "a", "a", "b", "b", "b", "b", "c", "c", "d", np.nan],
385+
[1, 1, 4, 1, 1, 2, 5, 1, 3, 1, 6],
386+
],
387+
names=["a", "b"],
388+
)
389+
expected = DataFrame(
390+
{
391+
"c": [np.nan, np.nan, 1, 1, 1, 1, 1, np.nan, 1, np.nan, 1],
392+
"d": [1.0, 4.0, np.nan, 2.0, 5.0, np.nan, np.nan, 3.0, np.nan, 6.0, np.nan],
393+
},
394+
index=mi_expected,
395+
)
396+
tm.assert_frame_equal(res, expected)

pandas/tests/test_sorting.py

+7
Original file line numberDiff line numberDiff line change
@@ -453,3 +453,10 @@ def test_extension_array_codes(self, verify, na_sentinel):
453453
expected_codes = np.array([0, 2, na_sentinel, 1], dtype=np.intp)
454454
tm.assert_extension_array_equal(result, expected_values)
455455
tm.assert_numpy_array_equal(codes, expected_codes)
456+
457+
458+
def test_mixed_str_nan():
459+
values = np.array(["b", np.nan, "a", "b"], dtype=object)
460+
result = safe_sort(values)
461+
expected = np.array([np.nan, "a", "b", "b"], dtype=object)
462+
tm.assert_numpy_array_equal(result, expected)

0 commit comments

Comments
 (0)