From a22f9b9b72ac100b96490560fdb988d168ebc161 Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Sun, 16 Feb 2025 12:17:41 -0500 Subject: [PATCH 01/10] fixed bug 60695 --- pandas/core/indexes/multi.py | 3 ++- .../tests/indexes/multi/test_constructors.py | 22 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index dc48cd1ed958e..64c0310641f15 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -9,6 +9,7 @@ Sequence, ) from functools import wraps +from itertools import zip_longest from sys import getsizeof from typing import ( TYPE_CHECKING, @@ -591,7 +592,7 @@ def from_tuples( elif isinstance(tuples, list): arrays = list(lib.to_object_array_tuples(tuples).T) else: - arrs = zip(*tuples) + arrs = zip_longest(*tuples, fillvalue=np.nan) arrays = cast(list[Sequence[Hashable]], arrs) return cls.from_arrays(arrays, sortorder=sortorder, names=names) diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index b2867d4ac8e68..db1e03126572f 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -410,6 +410,28 @@ def test_from_tuples_with_tuple_label(): tm.assert_frame_equal(expected, result) +@pytest.mark.parametrize( + "keys, expected", + ( + ((("l1",), ("l1", "l2")), (("l1", np.nan), ("l1", "l2"))), + ( + ( + ( + "l1", + "l2", + ), + ("l1",), + ), + (("l1", "l2"), ("l1", np.nan)), + ), + ), +) +def test_from_tuples_with_various_tuple_lengths(keys, expected): + # GH 60695 + idx = MultiIndex.from_tuples(keys) + assert tuple(idx) == expected + + # ---------------------------------------------------------------------------- # from_product # ---------------------------------------------------------------------------- From 7cacf2e064e66b939b6776cb8a11c04434386888 Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Sun, 16 Feb 2025 12:57:22 -0500 Subject: [PATCH 02/10] added test cases --- pandas/tests/series/test_constructors.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index a2be698c0ec28..2437cc15f72da 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1447,6 +1447,12 @@ def test_constructor_dict_of_tuples(self): expected = Series([3, 6], index=MultiIndex.from_tuples([(1, 2), (None, 5)])) tm.assert_series_equal(result, expected) + # GH 60695 + data = {(1,): 3, (4, 5): 6} + result = Series(data).sort_values() + expected = Series([3, 6], index=MultiIndex.from_tuples([(1, None), (4, 5)])) + tm.assert_series_equal(result, expected) + # https://github.com/pandas-dev/pandas/issues/22698 @pytest.mark.filterwarnings("ignore:elementwise comparison:FutureWarning") def test_fromDict(self, using_infer_string): @@ -1878,6 +1884,15 @@ def test_constructor_dict_multiindex(self): result = result.reindex(index=expected.index) tm.assert_series_equal(result, expected) + # GH 60695 + d = {("a",): 0.0, ("a", "b"): 1.0} + _d = sorted(d.items()) + result = Series(d) + expected = Series( + [x[1] for x in _d], index=MultiIndex.from_tuples([x[0] for x in _d]) + ) + tm.assert_series_equal(result, expected) + def test_constructor_dict_multiindex_reindex_flat(self): # construction involves reindexing with a MultiIndex corner case data = {("i", "i"): 0, ("i", "j"): 1, ("j", "i"): 2, "j": np.nan} From d705f5aa64f45dce3b3679b25d95e758d033e09f Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Sun, 16 Feb 2025 13:14:34 -0500 Subject: [PATCH 03/10] documenting changes in rst --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4d9a45abe17cd..65cf198e44340 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -699,7 +699,7 @@ MultiIndex - :meth:`MultiIndex.insert` would not insert NA value correctly at unified location of index -1 (:issue:`59003`) - :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`) - Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`) -- +- Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`) I/O ^^^ From 18ef5279c37a2179bf07c4afc607b43d469ee487 Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Sun, 16 Feb 2025 15:00:45 -0500 Subject: [PATCH 04/10] fixed failing test case --- pandas/tests/series/methods/test_map.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py index 84b60a2afe6eb..e210806b7aea2 100644 --- a/pandas/tests/series/methods/test_map.py +++ b/pandas/tests/series/methods/test_map.py @@ -288,7 +288,9 @@ def test_map_dict_with_tuple_keys(): df = DataFrame({"a": [(1,), (2,), (3, 4), (5, 6)]}) label_mappings = {(1,): "A", (2,): "B", (3, 4): "A", (5, 6): "B"} - df["labels"] = df["a"].map(label_mappings) + # GH 60695 + df["labels"] = df["a"].apply(lambda x: label_mappings.get(x, None)) + df["expected_labels"] = Series(["A", "B", "A", "B"], index=df.index) # All labels should be filled now tm.assert_series_equal(df["labels"], df["expected_labels"], check_names=False) From 9dbf262ef30476ca0b0fb96f6ab651fd9a64989e Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Sun, 23 Feb 2025 19:52:35 -0500 Subject: [PATCH 05/10] fixed requested changes --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/core/algorithms.py | 6 ++ .../tests/indexes/multi/test_constructors.py | 4 +- pandas/tests/series/methods/test_map.py | 4 +- pandas/tests/series/test_constructors.py | 75 +++++++++++-------- 5 files changed, 52 insertions(+), 39 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 65cf198e44340..a7b2d3e72fcdd 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -699,7 +699,7 @@ MultiIndex - :meth:`MultiIndex.insert` would not insert NA value correctly at unified location of index -1 (:issue:`59003`) - :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`) - Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`) -- Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`) +- Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`) I/O ^^^ diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index aafd802b827a5..0c0232bdc6d4c 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1647,6 +1647,8 @@ def map_array( If the function returns a tuple with more than one element a MultiIndex will be returned. """ + from pandas import Index + if na_action not in (None, "ignore"): msg = f"na_action must either be 'ignore' or None, {na_action} was passed" raise ValueError(msg) @@ -1676,6 +1678,10 @@ def map_array( if len(mapper) == 0: mapper = Series(mapper, dtype=np.float64) + elif isinstance(mapper, dict): + mapper = Series( + mapper.values(), index=Index(mapper.keys(), tupleize_cols=False) + ) else: mapper = Series(mapper) diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index db1e03126572f..5e1bcfcbb0c5a 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -412,7 +412,7 @@ def test_from_tuples_with_tuple_label(): @pytest.mark.parametrize( "keys, expected", - ( + [ ((("l1",), ("l1", "l2")), (("l1", np.nan), ("l1", "l2"))), ( ( @@ -424,7 +424,7 @@ def test_from_tuples_with_tuple_label(): ), (("l1", "l2"), ("l1", np.nan)), ), - ), + ], ) def test_from_tuples_with_various_tuple_lengths(keys, expected): # GH 60695 diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py index e210806b7aea2..84b60a2afe6eb 100644 --- a/pandas/tests/series/methods/test_map.py +++ b/pandas/tests/series/methods/test_map.py @@ -288,9 +288,7 @@ def test_map_dict_with_tuple_keys(): df = DataFrame({"a": [(1,), (2,), (3, 4), (5, 6)]}) label_mappings = {(1,): "A", (2,): "B", (3, 4): "A", (5, 6): "B"} - # GH 60695 - df["labels"] = df["a"].apply(lambda x: label_mappings.get(x, None)) - + df["labels"] = df["a"].map(label_mappings) df["expected_labels"] = Series(["A", "B", "A", "B"], index=df.index) # All labels should be filled now tm.assert_series_equal(df["labels"], df["expected_labels"], check_names=False) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 2437cc15f72da..dd9bc20fdefad 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1441,16 +1441,17 @@ def test_constructor_tuple_of_tuples(self): s = Series(data) assert tuple(s) == data - def test_constructor_dict_of_tuples(self): - data = {(1, 2): 3, (None, 5): 6} - result = Series(data).sort_values() - expected = Series([3, 6], index=MultiIndex.from_tuples([(1, 2), (None, 5)])) - tm.assert_series_equal(result, expected) - - # GH 60695 - data = {(1,): 3, (4, 5): 6} + @pytest.mark.parametrize( + "data, expected_values, expected_index", + [ + ({(1, 2): 3, (None, 5): 6}, [3, 6], [(1, 2), (None, 5)]), + # GH 60695 test case + ({(1,): 3, (4, 5): 6}, [3, 6], [(1, None), (4, 5)]), + ], + ) + def test_constructor_dict_of_tuples(self, data, expected_values, expected_index): result = Series(data).sort_values() - expected = Series([3, 6], index=MultiIndex.from_tuples([(1, None), (4, 5)])) + expected = Series(expected_values, index=MultiIndex.from_tuples(expected_index)) tm.assert_series_equal(result, expected) # https://github.com/pandas-dev/pandas/issues/22698 @@ -1866,32 +1867,40 @@ class A(OrderedDict): series = Series(A(data)) tm.assert_series_equal(series, expected) - def test_constructor_dict_multiindex(self): - d = {("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0} - _d = sorted(d.items()) - result = Series(d) - expected = Series( - [x[1] for x in _d], index=MultiIndex.from_tuples([x[0] for x in _d]) - ) - tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + "data, expected_index_multi, expected_index_single", + [ + ( + {("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}, + MultiIndex.from_tuples([("a", "a"), ("b", "a"), ("b", "c")]), + None, + ), + ( + {("a",): 0.0, ("a", "b"): 1.0}, + MultiIndex.from_tuples([("a",), ("a", "b")]), + None, + ), + ( + {("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0, "z": 111.0}, + None, + Index(["z", ("a", "a"), ("b", "a"), ("b", "c")], tupleize_cols=False), + ), + ], + ) + def test_constructor_dict_multiindex( + data, expected_index_multi, expected_index_single + ): + _d = sorted(data.items()) - d["z"] = 111.0 - _d.insert(0, ("z", d["z"])) - result = Series(d) - expected = Series( - [x[1] for x in _d], index=Index([x[0] for x in _d], tupleize_cols=False) - ) - result = result.reindex(index=expected.index) - tm.assert_series_equal(result, expected) + result = Series(data) + if expected_index_multi: + expected = Series([x[1] for x in _d], index=expected_index_multi) + tm.assert_series_equal(result, expected) - # GH 60695 - d = {("a",): 0.0, ("a", "b"): 1.0} - _d = sorted(d.items()) - result = Series(d) - expected = Series( - [x[1] for x in _d], index=MultiIndex.from_tuples([x[0] for x in _d]) - ) - tm.assert_series_equal(result, expected) + if expected_index_single: + result = result.reindex(index=expected_index_single) + expected = Series([x[1] for x in _d], index=expected_index_single) + tm.assert_series_equal(result, expected) def test_constructor_dict_multiindex_reindex_flat(self): # construction involves reindexing with a MultiIndex corner case From 8a0266f62c17e7aa9daaf386ef968cbec50850eb Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Sun, 23 Feb 2025 20:26:51 -0500 Subject: [PATCH 06/10] fixed test case issue --- pandas/tests/series/test_constructors.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index dd9bc20fdefad..0e10b0cb92578 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1883,23 +1883,31 @@ class A(OrderedDict): ( {("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0, "z": 111.0}, None, - Index(["z", ("a", "a"), ("b", "a"), ("b", "c")], tupleize_cols=False), + Index(["z", ("a", "a"), ("b", "a"), ("b", "c")], dtype=object), ), ], ) def test_constructor_dict_multiindex( - data, expected_index_multi, expected_index_single + self, data, expected_index_multi, expected_index_single ): - _d = sorted(data.items()) + if all(isinstance(k, tuple) for k in data.keys()): + sorted_data = sorted(data.items()) + else: + sorted_data = list(data.items()) result = Series(data) - if expected_index_multi: - expected = Series([x[1] for x in _d], index=expected_index_multi) + + if expected_index_multi is not None: + expected = Series([x[1] for x in sorted_data], index=expected_index_multi) tm.assert_series_equal(result, expected) - if expected_index_single: - result = result.reindex(index=expected_index_single) - expected = Series([x[1] for x in _d], index=expected_index_single) + if expected_index_single is not None: + result = result.reindex(index=expected_index_single, fill_value=np.nan) + expected_values = [ + data[idx] if idx in data else np.nan for idx in expected_index_single + ] + expected = Series(expected_values, index=expected_index_single) + tm.assert_series_equal(result, expected) def test_constructor_dict_multiindex_reindex_flat(self): From 1cf61a63ec882bbd24dc840f02a64adc42af5616 Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Tue, 25 Feb 2025 13:35:22 -0500 Subject: [PATCH 07/10] fixed test case input alignment --- pandas/tests/indexes/multi/test_constructors.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index 5e1bcfcbb0c5a..92827cf154394 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -414,16 +414,7 @@ def test_from_tuples_with_tuple_label(): "keys, expected", [ ((("l1",), ("l1", "l2")), (("l1", np.nan), ("l1", "l2"))), - ( - ( - ( - "l1", - "l2", - ), - ("l1",), - ), - (("l1", "l2"), ("l1", np.nan)), - ), + ((("l1", "l2"), ("l1",)), (("l1", "l2"), ("l1", np.nan))), ], ) def test_from_tuples_with_various_tuple_lengths(keys, expected): From bb06cc4758d134b9296c24c94ceb5c06e581ba8e Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Mon, 3 Mar 2025 22:59:17 -0500 Subject: [PATCH 08/10] fixed test-case reviewed changes --- pandas/tests/series/test_constructors.py | 39 +++++++++--------------- 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 0e10b0cb92578..cc08542a13a27 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1445,11 +1445,11 @@ def test_constructor_tuple_of_tuples(self): "data, expected_values, expected_index", [ ({(1, 2): 3, (None, 5): 6}, [3, 6], [(1, 2), (None, 5)]), - # GH 60695 test case ({(1,): 3, (4, 5): 6}, [3, 6], [(1, None), (4, 5)]), ], ) def test_constructor_dict_of_tuples(self, data, expected_values, expected_index): + # GH 60695 result = Series(data).sort_values() expected = Series(expected_values, index=MultiIndex.from_tuples(expected_index)) tm.assert_series_equal(result, expected) @@ -1870,44 +1870,33 @@ class A(OrderedDict): @pytest.mark.parametrize( "data, expected_index_multi, expected_index_single", [ + ({("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}, True, None), + ({("a",): 0.0, ("a", "b"): 1.0}, True, None), ( - {("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}, - MultiIndex.from_tuples([("a", "a"), ("b", "a"), ("b", "c")]), + {"z": 111.0, ("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}, None, - ), - ( - {("a",): 0.0, ("a", "b"): 1.0}, - MultiIndex.from_tuples([("a",), ("a", "b")]), - None, - ), - ( - {("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0, "z": 111.0}, - None, - Index(["z", ("a", "a"), ("b", "a"), ("b", "c")], dtype=object), + True, ), ], ) def test_constructor_dict_multiindex( self, data, expected_index_multi, expected_index_single ): - if all(isinstance(k, tuple) for k in data.keys()): - sorted_data = sorted(data.items()) - else: - sorted_data = list(data.items()) - + # GH#60695 result = Series(data) if expected_index_multi is not None: - expected = Series([x[1] for x in sorted_data], index=expected_index_multi) + expected = Series( + list(data.values()), + index=MultiIndex.from_tuples(list(data.keys())), + ) tm.assert_series_equal(result, expected) if expected_index_single is not None: - result = result.reindex(index=expected_index_single, fill_value=np.nan) - expected_values = [ - data[idx] if idx in data else np.nan for idx in expected_index_single - ] - expected = Series(expected_values, index=expected_index_single) - + expected = Series( + list(data.values()), + index=Index(list(data.keys())), + ) tm.assert_series_equal(result, expected) def test_constructor_dict_multiindex_reindex_flat(self): From e868ca111612c0c3d98d3dc9781d3265b590eb65 Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Mon, 3 Mar 2025 23:15:19 -0500 Subject: [PATCH 09/10] modified test case parameters --- pandas/tests/series/test_constructors.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index cc08542a13a27..c320e65e0ba72 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1870,11 +1870,11 @@ class A(OrderedDict): @pytest.mark.parametrize( "data, expected_index_multi, expected_index_single", [ - ({("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}, True, None), - ({("a",): 0.0, ("a", "b"): 1.0}, True, None), + ({("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}, True, False), + ({("a",): 0.0, ("a", "b"): 1.0}, True, False), ( {"z": 111.0, ("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}, - None, + False, True, ), ], @@ -1885,14 +1885,14 @@ def test_constructor_dict_multiindex( # GH#60695 result = Series(data) - if expected_index_multi is not None: + if expected_index_multi: expected = Series( list(data.values()), index=MultiIndex.from_tuples(list(data.keys())), ) tm.assert_series_equal(result, expected) - if expected_index_single is not None: + if expected_index_single: expected = Series( list(data.values()), index=Index(list(data.keys())), From 23bb619b720759b7d54d0f1ef4ceaff0a40cc252 Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Thu, 6 Mar 2025 17:03:01 -0500 Subject: [PATCH 10/10] fixed if else for single parameter --- pandas/tests/series/test_constructors.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index c320e65e0ba72..5f4a100e7ccc7 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1868,20 +1868,14 @@ class A(OrderedDict): tm.assert_series_equal(series, expected) @pytest.mark.parametrize( - "data, expected_index_multi, expected_index_single", + "data, expected_index_multi", [ - ({("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}, True, False), - ({("a",): 0.0, ("a", "b"): 1.0}, True, False), - ( - {"z": 111.0, ("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}, - False, - True, - ), + ({("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}, True), + ({("a",): 0.0, ("a", "b"): 1.0}, True), + ({"z": 111.0, ("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}, False), ], ) - def test_constructor_dict_multiindex( - self, data, expected_index_multi, expected_index_single - ): + def test_constructor_dict_multiindex(self, data, expected_index_multi): # GH#60695 result = Series(data) @@ -1891,8 +1885,7 @@ def test_constructor_dict_multiindex( index=MultiIndex.from_tuples(list(data.keys())), ) tm.assert_series_equal(result, expected) - - if expected_index_single: + else: expected = Series( list(data.values()), index=Index(list(data.keys())),