Skip to content

BUG: Fix MultiIndex from_tuples on tuples with NaNs #60944

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Mar 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
a22f9b9
fixed bug 60695
Anurag-Varma Feb 16, 2025
7cacf2e
added test cases
Anurag-Varma Feb 16, 2025
d705f5a
documenting changes in rst
Anurag-Varma Feb 16, 2025
18ef527
fixed failing test case
Anurag-Varma Feb 16, 2025
82b0457
Merge branch 'main' into bug#60695
Anurag-Varma Feb 21, 2025
9dbf262
fixed requested changes
Anurag-Varma Feb 24, 2025
c7c0b03
Merge branch 'main' of https://github.com/pandas-dev/pandas into bug#…
Anurag-Varma Feb 24, 2025
f0e3942
Merge branch 'bug#60695' of https://github.com/Anurag-Varma/pandas in…
Anurag-Varma Feb 24, 2025
8a0266f
fixed test case issue
Anurag-Varma Feb 24, 2025
55c7fc8
Merge branch 'main' into bug#60695
Anurag-Varma Feb 24, 2025
d3c5263
Merge branch 'main' into bug#60695
Anurag-Varma Feb 25, 2025
1cf61a6
fixed test case input alignment
Anurag-Varma Feb 25, 2025
2a1f226
Merge branch 'main' of https://github.com/pandas-dev/pandas into bug#…
Anurag-Varma Feb 25, 2025
77436f1
Merge branch 'bug#60695' of https://github.com/Anurag-Varma/pandas in…
Anurag-Varma Feb 25, 2025
bb06cc4
fixed test-case reviewed changes
Anurag-Varma Mar 4, 2025
95fd837
Merge branch 'main' of https://github.com/pandas-dev/pandas into bug#…
Anurag-Varma Mar 4, 2025
e868ca1
modified test case parameters
Anurag-Varma Mar 4, 2025
23bb619
fixed if else for single parameter
Anurag-Varma Mar 6, 2025
9c6f609
Merge branch 'main' of https://github.com/pandas-dev/pandas into bug#…
Anurag-Varma Mar 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -712,7 +712,7 @@ MultiIndex
- :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`)
- Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`)
- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`)
-
- Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`)

I/O
^^^
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1647,6 +1647,8 @@ def map_array(
If the function returns a tuple with more than one element
a MultiIndex will be returned.
"""
from pandas import Index

if na_action not in (None, "ignore"):
msg = f"na_action must either be 'ignore' or None, {na_action} was passed"
raise ValueError(msg)
Expand Down Expand Up @@ -1676,6 +1678,10 @@ def map_array(

if len(mapper) == 0:
mapper = Series(mapper, dtype=np.float64)
elif isinstance(mapper, dict):
mapper = Series(
mapper.values(), index=Index(mapper.keys(), tupleize_cols=False)
)
else:
mapper = Series(mapper)

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
Sequence,
)
from functools import wraps
from itertools import zip_longest
from sys import getsizeof
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -588,7 +589,7 @@ def from_tuples(
elif isinstance(tuples, list):
arrays = list(lib.to_object_array_tuples(tuples).T)
else:
arrs = zip(*tuples)
arrs = zip_longest(*tuples, fillvalue=np.nan)
arrays = cast(list[Sequence[Hashable]], arrs)

return cls.from_arrays(arrays, sortorder=sortorder, names=names)
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/indexes/multi/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,19 @@ def test_from_tuples_with_tuple_label():
tm.assert_frame_equal(expected, result)


@pytest.mark.parametrize(
"keys, expected",
[
((("l1",), ("l1", "l2")), (("l1", np.nan), ("l1", "l2"))),
((("l1", "l2"), ("l1",)), (("l1", "l2"), ("l1", np.nan))),
],
)
def test_from_tuples_with_various_tuple_lengths(keys, expected):
# GH 60695
idx = MultiIndex.from_tuples(keys)
assert tuple(idx) == expected


# ----------------------------------------------------------------------------
# from_product
# ----------------------------------------------------------------------------
Expand Down
52 changes: 33 additions & 19 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1441,10 +1441,17 @@ def test_constructor_tuple_of_tuples(self):
s = Series(data)
assert tuple(s) == data

def test_constructor_dict_of_tuples(self):
data = {(1, 2): 3, (None, 5): 6}
@pytest.mark.parametrize(
"data, expected_values, expected_index",
[
({(1, 2): 3, (None, 5): 6}, [3, 6], [(1, 2), (None, 5)]),
({(1,): 3, (4, 5): 6}, [3, 6], [(1, None), (4, 5)]),
],
)
def test_constructor_dict_of_tuples(self, data, expected_values, expected_index):
# GH 60695
result = Series(data).sort_values()
expected = Series([3, 6], index=MultiIndex.from_tuples([(1, 2), (None, 5)]))
expected = Series(expected_values, index=MultiIndex.from_tuples(expected_index))
tm.assert_series_equal(result, expected)

# https://github.com/pandas-dev/pandas/issues/22698
Expand Down Expand Up @@ -1860,23 +1867,30 @@ class A(OrderedDict):
series = Series(A(data))
tm.assert_series_equal(series, expected)

def test_constructor_dict_multiindex(self):
d = {("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}
_d = sorted(d.items())
result = Series(d)
expected = Series(
[x[1] for x in _d], index=MultiIndex.from_tuples([x[0] for x in _d])
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"data, expected_index_multi",
[
({("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}, True),
({("a",): 0.0, ("a", "b"): 1.0}, True),
({"z": 111.0, ("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}, False),
],
)
def test_constructor_dict_multiindex(self, data, expected_index_multi):
# GH#60695
result = Series(data)

d["z"] = 111.0
_d.insert(0, ("z", d["z"]))
result = Series(d)
expected = Series(
[x[1] for x in _d], index=Index([x[0] for x in _d], tupleize_cols=False)
)
result = result.reindex(index=expected.index)
tm.assert_series_equal(result, expected)
if expected_index_multi:
expected = Series(
list(data.values()),
index=MultiIndex.from_tuples(list(data.keys())),
)
tm.assert_series_equal(result, expected)
else:
expected = Series(
list(data.values()),
index=Index(list(data.keys())),
)
tm.assert_series_equal(result, expected)

def test_constructor_dict_multiindex_reindex_flat(self):
# construction involves reindexing with a MultiIndex corner case
Expand Down
Loading