From b6c529798c337099231e7400e32fa9d341a3246b Mon Sep 17 00:00:00 2001 From: mansoora Date: Sat, 15 Feb 2025 17:08:40 +0530 Subject: [PATCH 01/12] fix for #60695 fix Series constructor dropping key levels when keys have varying entry counts --- pandas/core/indexes/multi.py | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index dc48cd1ed958e..0f46998024661 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -540,23 +540,6 @@ def from_tuples( Returns ------- MultiIndex - - See Also - -------- - MultiIndex.from_arrays : Convert list of arrays to MultiIndex. - MultiIndex.from_product : Make a MultiIndex from cartesian product - of iterables. - MultiIndex.from_frame : Make a MultiIndex from a DataFrame. - - Examples - -------- - >>> tuples = [(1, "red"), (1, "blue"), (2, "red"), (2, "blue")] - >>> pd.MultiIndex.from_tuples(tuples, names=("number", "color")) - MultiIndex([(1, 'red'), - (1, 'blue'), - (2, 'red'), - (2, 'blue')], - names=['number', 'color']) """ if not is_list_like(tuples): raise TypeError("Input must be a list / sequence of tuple-likes.") @@ -591,7 +574,9 @@ def from_tuples( elif isinstance(tuples, list): arrays = list(lib.to_object_array_tuples(tuples).T) else: - arrs = zip(*tuples) + # Use zip_longest instead of zip to handle tuples of different lengths + from itertools import zip_longest + arrs = zip_longest(*tuples, fillvalue=np.nan) arrays = cast(list[Sequence[Hashable]], arrs) return cls.from_arrays(arrays, sortorder=sortorder, names=names) From dacff95070154cb9512f513e8f9cee3467a75694 Mon Sep 17 00:00:00 2001 From: mansoora Date: Sat, 15 Feb 2025 17:17:42 +0530 Subject: [PATCH 02/12] fix for #60695 fix Series constructor dropping key levels when keys have varying entry counts --- .../tests/indexes/multi/test_constructors.py | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index b2867d4ac8e68..481e03ed4a648 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -857,3 +857,55 @@ def test_dtype_representation(using_infer_string): dtype=object, ) tm.assert_series_equal(result, expected) + + +def test_from_tuples_different_lengths_gh60695(): + """ + Test that MultiIndex.from_tuples properly handles tuples of different lengths. + + GH#60695 + """ + # Test case 1: Original issue example + tuples = [("l1",), ("l1", "l2")] + result = pd.MultiIndex.from_tuples(tuples) + expected = pd.MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")]) + tm.assert_index_equal(result, expected) + + # Test case 2: Series construction with tuple keys + s = pd.Series({("l1",): "v1", ("l1", "l2"): "v2"}) + expected = pd.Series( + ["v1", "v2"], + index=pd.MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")]) + ) + tm.assert_series_equal(s, expected) + + # Test case 3: Multiple levels with None + data = {(1, 1, None): -1.0} + result = pd.Series(data) + expected = pd.Series( + -1.0, + index=pd.MultiIndex.from_tuples([(1, 1, np.nan)]), + ) + tm.assert_series_equal(result, expected) + + # Test case 4: Mixed length tuples + tuples = [("a",), ("b", "c"), ("d", "e", "f")] + result = pd.MultiIndex.from_tuples(tuples) + expected = pd.MultiIndex.from_tuples([ + ("a", np.nan, np.nan), + ("b", "c", np.nan), + ("d", "e", "f") + ]) + tm.assert_index_equal(result, expected) + + # Test case 5: DataFrame with tuple index + df = pd.DataFrame( + {"col": ["v1", "v2"]}, + index=pd.MultiIndex.from_tuples([("l1",), ("l1", "l2")]) + ) + expected_index = pd.MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")]) + expected_df = pd.DataFrame( + {"col": ["v1", "v2"]}, + index=expected_index + ) + tm.assert_frame_equal(df, expected_df) From 46dfc5b2313f9171e686cda341c3a5c4e7f789ee Mon Sep 17 00:00:00 2001 From: mansoora Date: Sat, 15 Feb 2025 19:49:29 +0530 Subject: [PATCH 03/12] fix for #60695 fix Series constructor dropping key levels when keys have varying entry counts --- pandas/core/indexes/multi.py | 50 +++++------------- .../tests/indexes/multi/test_constructors.py | 52 +++++++++---------- 2 files changed, 38 insertions(+), 64 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 0f46998024661..69d6c9e523a5e 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -526,58 +526,36 @@ def from_tuples( ) -> MultiIndex: """ Convert list of tuples to MultiIndex. - - Parameters - ---------- - tuples : list / sequence of tuple-likes - Each tuple is the index of one row/column. - sortorder : int or None - Level of sortedness (must be lexicographically sorted by that - level). - names : list / sequence of str, optional - Names for the levels in the index. - - Returns - ------- - MultiIndex """ if not is_list_like(tuples): raise TypeError("Input must be a list / sequence of tuple-likes.") + if is_iterator(tuples): tuples = list(tuples) + + # Cast to proper tuple type tuples = cast(Collection[tuple[Hashable, ...]], tuples) - - # handling the empty tuple cases - if len(tuples) and all(isinstance(e, tuple) and not e for e in tuples): - codes = [np.zeros(len(tuples))] - levels = [Index(com.asarray_tuplesafe(tuples, dtype=np.dtype("object")))] - return cls( - levels=levels, - codes=codes, - sortorder=sortorder, - names=names, - verify_integrity=False, - ) - - arrays: list[Sequence[Hashable]] + if len(tuples) == 0: if names is None: raise TypeError("Cannot infer number of levels from empty list") - # error: Argument 1 to "len" has incompatible type "Hashable"; - # expected "Sized" - arrays = [[]] * len(names) # type: ignore[arg-type] + arrays = [[]] * len(names) elif isinstance(tuples, (np.ndarray, Index)): if isinstance(tuples, Index): tuples = np.asarray(tuples._values) - arrays = list(lib.tuples_to_object_array(tuples).T) elif isinstance(tuples, list): - arrays = list(lib.to_object_array_tuples(tuples).T) + # Find the maximum length of tuples + max_length = max((len(t) for t in tuples), default=0) + + # Pad shorter tuples with np.nan + padded_tuples = [ + t + (np.nan,) * (max_length - len(t)) for t in tuples + ] + arrays = list(lib.to_object_array_tuples(padded_tuples).T) else: - # Use zip_longest instead of zip to handle tuples of different lengths from itertools import zip_longest - arrs = zip_longest(*tuples, fillvalue=np.nan) - arrays = cast(list[Sequence[Hashable]], arrs) + arrays = list(zip_longest(*tuples, fillvalue=np.nan)) return cls.from_arrays(arrays, sortorder=sortorder, names=names) diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index 481e03ed4a648..7aebe68318aae 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -865,11 +865,11 @@ def test_from_tuples_different_lengths_gh60695(): GH#60695 """ - # Test case 1: Original issue example + # Test case 1: Original issue example with string values tuples = [("l1",), ("l1", "l2")] result = pd.MultiIndex.from_tuples(tuples) expected = pd.MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")]) - tm.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected, exact=True) # Test case 2: Series construction with tuple keys s = pd.Series({("l1",): "v1", ("l1", "l2"): "v2"}) @@ -877,35 +877,31 @@ def test_from_tuples_different_lengths_gh60695(): ["v1", "v2"], index=pd.MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")]) ) - tm.assert_series_equal(s, expected) + tm.assert_series_equal(s, expected, check_index_type=True) - # Test case 3: Multiple levels with None - data = {(1, 1, None): -1.0} - result = pd.Series(data) - expected = pd.Series( - -1.0, - index=pd.MultiIndex.from_tuples([(1, 1, np.nan)]), - ) - tm.assert_series_equal(result, expected) + # Test case 3: Handle numeric values + tuples = [(1,), (1, 2)] + result = pd.MultiIndex.from_tuples(tuples) + expected = pd.MultiIndex.from_tuples([(1, np.nan), (1, 2)]) + tm.assert_index_equal(result, expected, exact=True) - # Test case 4: Mixed length tuples - tuples = [("a",), ("b", "c"), ("d", "e", "f")] + # Test case 4: Mixed types (strings and integers) + tuples = [(1, "a"), (1,), (2, "b", "c")] result = pd.MultiIndex.from_tuples(tuples) expected = pd.MultiIndex.from_tuples([ - ("a", np.nan, np.nan), - ("b", "c", np.nan), - ("d", "e", "f") + (1, "a", np.nan), + (1, np.nan, np.nan), + (2, "b", "c") ]) - tm.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected, exact=True) - # Test case 5: DataFrame with tuple index - df = pd.DataFrame( - {"col": ["v1", "v2"]}, - index=pd.MultiIndex.from_tuples([("l1",), ("l1", "l2")]) - ) - expected_index = pd.MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")]) - expected_df = pd.DataFrame( - {"col": ["v1", "v2"]}, - index=expected_index - ) - tm.assert_frame_equal(df, expected_df) + # Test case 5: Empty tuples + tuples = [] + with pytest.raises(TypeError, match="Cannot infer number of levels"): + pd.MultiIndex.from_tuples(tuples) + + # Test case 6: Single level consistency + tuples = [("a",)] + result = pd.MultiIndex.from_tuples(tuples) + expected = pd.MultiIndex.from_tuples([("a",)]) + tm.assert_index_equal(result, expected, exact=True) From 0f77b11270342813a13a216bdaa0e74cfbc1083f Mon Sep 17 00:00:00 2001 From: mansoora Date: Sat, 15 Feb 2025 20:29:05 +0530 Subject: [PATCH 04/12] fix for #60695 fix Series constructor dropping key levels when keys have varying entry counts --- pandas/core/indexes/multi.py | 53 ++++++++++------ .../tests/indexes/multi/test_constructors.py | 62 ++++++++++++------- 2 files changed, 74 insertions(+), 41 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 69d6c9e523a5e..7be9697b0e21c 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -7,6 +7,7 @@ Hashable, Iterable, Sequence, + List, ) from functools import wraps from sys import getsizeof @@ -15,6 +16,7 @@ Any, Literal, cast, + ArrayLike, ) import warnings @@ -526,37 +528,52 @@ def from_tuples( ) -> MultiIndex: """ Convert list of tuples to MultiIndex. + + Parameters + ---------- + tuples : list / sequence of tuple-likes + Each tuple is the index of one row/column. + sortorder : int or None + Level of sortedness (must be lexicographically sorted by that level). + names : list / sequence of str, optional + Names for the levels in the index. + + Returns + ------- + MultiIndex """ if not is_list_like(tuples): raise TypeError("Input must be a list / sequence of tuple-likes.") - + if is_iterator(tuples): tuples = list(tuples) - - # Cast to proper tuple type + tuples = cast(Collection[tuple[Hashable, ...]], tuples) - + if len(tuples) == 0: if names is None: raise TypeError("Cannot infer number of levels from empty list") - arrays = [[]] * len(names) - elif isinstance(tuples, (np.ndarray, Index)): + names_seq = cast(Sequence[Hashable], names) + arrays: List[ArrayLike] = [[]] * len(names_seq) + return cls.from_arrays(arrays, sortorder=sortorder, names=names) + + if isinstance(tuples, (np.ndarray, Index)): if isinstance(tuples, Index): tuples = np.asarray(tuples._values) arrays = list(lib.tuples_to_object_array(tuples).T) - elif isinstance(tuples, list): - # Find the maximum length of tuples - max_length = max((len(t) for t in tuples), default=0) - - # Pad shorter tuples with np.nan - padded_tuples = [ - t + (np.nan,) * (max_length - len(t)) for t in tuples - ] - arrays = list(lib.to_object_array_tuples(padded_tuples).T) - else: - from itertools import zip_longest - arrays = list(zip_longest(*tuples, fillvalue=np.nan)) + return cls.from_arrays(arrays, sortorder=sortorder, names=names) + + # Convert to list and process + tuples_list = list(tuples) + max_length = max(len(t) if isinstance(t, tuple) else 1 for t in tuples_list) + + result_tuples = [] + for t in tuples_list: + if not isinstance(t, tuple): + t = (t,) + result_tuples.append(t + (np.nan,) * (max_length - len(t))) + arrays = list(lib.to_object_array_tuples(result_tuples).T) return cls.from_arrays(arrays, sortorder=sortorder, names=names) @classmethod diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index 7aebe68318aae..17b630c3e8d92 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -865,43 +865,59 @@ def test_from_tuples_different_lengths_gh60695(): GH#60695 """ - # Test case 1: Original issue example with string values + # Test case 1: Basic string tuples tuples = [("l1",), ("l1", "l2")] - result = pd.MultiIndex.from_tuples(tuples) - expected = pd.MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")]) - tm.assert_index_equal(result, expected, exact=True) + result = MultiIndex.from_tuples(tuples) + expected = MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")]) + tm.assert_index_equal(result, expected) - # Test case 2: Series construction with tuple keys + # Test case 2: Series with tuple keys s = pd.Series({("l1",): "v1", ("l1", "l2"): "v2"}) expected = pd.Series( ["v1", "v2"], - index=pd.MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")]) + index=MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")]) ) - tm.assert_series_equal(s, expected, check_index_type=True) + tm.assert_series_equal(s, expected) - # Test case 3: Handle numeric values + # Test case 3: Numeric tuples tuples = [(1,), (1, 2)] - result = pd.MultiIndex.from_tuples(tuples) - expected = pd.MultiIndex.from_tuples([(1, np.nan), (1, 2)]) - tm.assert_index_equal(result, expected, exact=True) + result = MultiIndex.from_tuples(tuples) + expected = MultiIndex.from_tuples([(1, np.nan), (1, 2)]) + tm.assert_index_equal(result, expected) - # Test case 4: Mixed types (strings and integers) + # Test case 4: Mixed types tuples = [(1, "a"), (1,), (2, "b", "c")] - result = pd.MultiIndex.from_tuples(tuples) - expected = pd.MultiIndex.from_tuples([ + result = MultiIndex.from_tuples(tuples) + expected = MultiIndex.from_tuples([ (1, "a", np.nan), (1, np.nan, np.nan), (2, "b", "c") ]) - tm.assert_index_equal(result, expected, exact=True) + tm.assert_index_equal(result, expected) - # Test case 5: Empty tuples - tuples = [] + # Test case 5: Empty input with names + empty_idx = MultiIndex.from_tuples([], names=["a", "b"]) + assert empty_idx.names == ["a", "b"] + assert len(empty_idx) == 0 + + # Test case 6: Empty input without names with pytest.raises(TypeError, match="Cannot infer number of levels"): - pd.MultiIndex.from_tuples(tuples) + MultiIndex.from_tuples([]) - # Test case 6: Single level consistency - tuples = [("a",)] - result = pd.MultiIndex.from_tuples(tuples) - expected = pd.MultiIndex.from_tuples([("a",)]) - tm.assert_index_equal(result, expected, exact=True) + # Test case 7: None values + tuples = [(1, None), (1, 2)] + result = MultiIndex.from_tuples(tuples) + expected = MultiIndex.from_tuples([(1, np.nan), (1, 2)]) + tm.assert_index_equal(result, expected) + + # Test case 8: DataFrame with tuple index + df = pd.DataFrame( + {"col": ["v1", "v2"]}, + index=MultiIndex.from_tuples([("l1",), ("l1", "l2")]) + ) + expected_index = MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")]) + expected_df = pd.DataFrame( + {"col": ["v1", "v2"]}, + index=expected_index + ) + tm.assert_frame_equal(df, expected_df) From c71e8981c3127740a67dce97545f9b3032919051 Mon Sep 17 00:00:00 2001 From: mansoora Date: Sat, 15 Feb 2025 20:38:34 +0530 Subject: [PATCH 05/12] fix for #60695 fix Series constructor dropping key levels when keys have varying entry counts --- pandas/core/indexes/multi.py | 17 ++++--- .../tests/indexes/multi/test_constructors.py | 51 +++++++------------ 2 files changed, 27 insertions(+), 41 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 7be9697b0e21c..4e826f9d8d576 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -545,25 +545,28 @@ def from_tuples( if not is_list_like(tuples): raise TypeError("Input must be a list / sequence of tuple-likes.") - if is_iterator(tuples): - tuples = list(tuples) - - tuples = cast(Collection[tuple[Hashable, ...]], tuples) - - if len(tuples) == 0: + # Handle empty tuples case first + if isinstance(tuples, (list, tuple)) and len(tuples) == 0: if names is None: raise TypeError("Cannot infer number of levels from empty list") names_seq = cast(Sequence[Hashable], names) arrays: List[ArrayLike] = [[]] * len(names_seq) return cls.from_arrays(arrays, sortorder=sortorder, names=names) + # Convert iterator to list + if is_iterator(tuples): + tuples = list(tuples) + + tuples = cast(Collection[tuple[Hashable, ...]], tuples) + + # Handle numpy array or Index if isinstance(tuples, (np.ndarray, Index)): if isinstance(tuples, Index): tuples = np.asarray(tuples._values) arrays = list(lib.tuples_to_object_array(tuples).T) return cls.from_arrays(arrays, sortorder=sortorder, names=names) - # Convert to list and process + # Convert to list and normalize tuples_list = list(tuples) max_length = max(len(t) if isinstance(t, tuple) else 1 for t in tuples_list) diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index 17b630c3e8d92..b8d38325e4bef 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -862,14 +862,14 @@ def test_dtype_representation(using_infer_string): def test_from_tuples_different_lengths_gh60695(): """ Test that MultiIndex.from_tuples properly handles tuples of different lengths. - + GH#60695 """ # Test case 1: Basic string tuples tuples = [("l1",), ("l1", "l2")] result = MultiIndex.from_tuples(tuples) expected = MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")]) - tm.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected, exact=True) # Test case 2: Series with tuple keys s = pd.Series({("l1",): "v1", ("l1", "l2"): "v2"}) @@ -877,47 +877,30 @@ def test_from_tuples_different_lengths_gh60695(): ["v1", "v2"], index=MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")]) ) - tm.assert_series_equal(s, expected) - - # Test case 3: Numeric tuples - tuples = [(1,), (1, 2)] - result = MultiIndex.from_tuples(tuples) - expected = MultiIndex.from_tuples([(1, np.nan), (1, 2)]) - tm.assert_index_equal(result, expected) - - # Test case 4: Mixed types - tuples = [(1, "a"), (1,), (2, "b", "c")] - result = MultiIndex.from_tuples(tuples) - expected = MultiIndex.from_tuples([ - (1, "a", np.nan), - (1, np.nan, np.nan), - (2, "b", "c") - ]) - tm.assert_index_equal(result, expected) + tm.assert_series_equal(s, expected, check_index_type=True) - # Test case 5: Empty input with names + # Test case 3: Empty input with names empty_idx = MultiIndex.from_tuples([], names=["a", "b"]) + assert isinstance(empty_idx, MultiIndex) assert empty_idx.names == ["a", "b"] assert len(empty_idx) == 0 - # Test case 6: Empty input without names + # Test case 4: Empty input without names with pytest.raises(TypeError, match="Cannot infer number of levels"): MultiIndex.from_tuples([]) - # Test case 7: None values + # Test case 5: None values tuples = [(1, None), (1, 2)] result = MultiIndex.from_tuples(tuples) expected = MultiIndex.from_tuples([(1, np.nan), (1, 2)]) - tm.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected, exact=True) - # Test case 8: DataFrame with tuple index - df = pd.DataFrame( - {"col": ["v1", "v2"]}, - index=MultiIndex.from_tuples([("l1",), ("l1", "l2")]) - ) - expected_index = MultiIndex.from_tuples([("l1", np.nan), ("l1", "l2")]) - expected_df = pd.DataFrame( - {"col": ["v1", "v2"]}, - index=expected_index - ) - tm.assert_frame_equal(df, expected_df) + # Test case 6: Mixed types + tuples = [(1, "a"), (1,), (2, "b", "c")] + result = MultiIndex.from_tuples(tuples) + expected = MultiIndex.from_tuples([ + (1, "a", np.nan), + (1, np.nan, np.nan), + (2, "b", "c") + ]) + tm.assert_index_equal(result, expected, exact=True) From e51b274083d2665435a604f244b81d244e86776f Mon Sep 17 00:00:00 2001 From: mansoora Date: Sat, 15 Feb 2025 20:45:38 +0530 Subject: [PATCH 06/12] fix for #60695 fix Series constructor dropping key levels when keys have varying entry counts --- pandas/core/indexes/multi.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 4e826f9d8d576..118fbdbcff76c 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -567,16 +567,16 @@ def from_tuples( return cls.from_arrays(arrays, sortorder=sortorder, names=names) # Convert to list and normalize - tuples_list = list(tuples) - max_length = max(len(t) if isinstance(t, tuple) else 1 for t in tuples_list) - - result_tuples = [] - for t in tuples_list: - if not isinstance(t, tuple): - t = (t,) - result_tuples.append(t + (np.nan,) * (max_length - len(t))) - - arrays = list(lib.to_object_array_tuples(result_tuples).T) + tuples_list = [t if isinstance(t, tuple) else (t,) for t in tuples] + if not tuples_list: + arrays = [] + else: + max_length = max(len(t) for t in tuples_list) + result_tuples = [ + t + (np.nan,) * (max_length - len(t)) for t in tuples_list + ] + arrays = list(lib.to_object_array_tuples(result_tuples).T) + return cls.from_arrays(arrays, sortorder=sortorder, names=names) @classmethod From 0557c548f9356f2b8619a9c5847d57aa65c2c931 Mon Sep 17 00:00:00 2001 From: mansoora Date: Sat, 15 Feb 2025 20:53:27 +0530 Subject: [PATCH 07/12] fix for #60695 fix Series constructor dropping key levels when keys have varying entry counts --- pandas/core/indexes/multi.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 118fbdbcff76c..b315167838c01 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -6,6 +6,7 @@ Generator, Hashable, Iterable, + Mapping, Sequence, List, ) From f4ed47a2077225f4a7dbd2222b309d9db164dab2 Mon Sep 17 00:00:00 2001 From: mansoora Date: Sat, 15 Feb 2025 21:06:34 +0530 Subject: [PATCH 08/12] fix for #60695 fix Series constructor dropping key levels when keys have varying entry counts --- pandas/core/indexes/multi.py | 95 +++++++----------------------------- 1 file changed, 17 insertions(+), 78 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index b315167838c01..12b020655f0b8 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1,62 +1,37 @@ from __future__ import annotations -from collections.abc import ( +from typing import ( + TYPE_CHECKING, + Any, Callable, Collection, - Generator, Hashable, Iterable, - Mapping, - Sequence, + Iterator, List, -) -from functools import wraps -from sys import getsizeof -from typing import ( - TYPE_CHECKING, - Any, Literal, + Mapping, + Sequence, cast, - ArrayLike, + overload, ) -import warnings import numpy as np -from pandas._config import get_option - -from pandas._libs import ( - algos as libalgos, - index as libindex, - lib, -) -from pandas._libs.hashtable import duplicated +from pandas._libs import lib +from pandas._libs.hashtable import duplicated_int64 from pandas._typing import ( - AnyAll, - AnyArrayLike, + ArrayLike, Axis, - DropKeep, DtypeObj, F, - IgnoreRaise, - IndexLabel, - IndexT, - Scalar, - Self, Shape, npt, ) -from pandas.compat.numpy import function as nv -from pandas.errors import ( - InvalidIndexError, - PerformanceWarning, - UnsortedIndexError, -) +from pandas.errors import InvalidIndexError from pandas.util._decorators import ( - Appender, cache_readonly, doc, - set_module, ) from pandas.util._exceptions import find_stack_level @@ -64,42 +39,17 @@ from pandas.core.dtypes.common import ( ensure_int64, ensure_platform_int, - is_hashable, - is_integer, - is_iterator, + is_categorical_dtype, + is_extension_array_dtype, is_list_like, is_object_dtype, - is_scalar, - is_string_dtype, pandas_dtype, ) -from pandas.core.dtypes.dtypes import ( - CategoricalDtype, - ExtensionDtype, -) -from pandas.core.dtypes.generic import ( - ABCDataFrame, - ABCSeries, -) -from pandas.core.dtypes.inference import is_array_like -from pandas.core.dtypes.missing import ( - array_equivalent, - isna, -) +from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.missing import array_equivalent, isna import pandas.core.algorithms as algos -from pandas.core.array_algos.putmask import validate_putmask -from pandas.core.arrays import ( - Categorical, - ExtensionArray, -) -from pandas.core.arrays.categorical import ( - factorize_from_iterables, - recode_for_categories, -) -import pandas.core.common as com -from pandas.core.construction import sanitize_array -import pandas.core.indexes.base as ibase +from pandas.core.arrays.categorical import Categorical from pandas.core.indexes.base import ( Index, _index_shared_docs, @@ -107,20 +57,9 @@ get_unanimous_names, ) from pandas.core.indexes.frozen import FrozenList -from pandas.core.ops.invalid import make_invalid_op -from pandas.core.sorting import ( - get_group_index, - lexsort_indexer, -) - -from pandas.io.formats.printing import pprint_thing if TYPE_CHECKING: - from pandas import ( - CategoricalIndex, - DataFrame, - Series, - ) + from pandas import DataFrame _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update( From af99a2518c9506b1c4f40a51342bddd9f4c31dd3 Mon Sep 17 00:00:00 2001 From: mansoora Date: Sat, 15 Feb 2025 21:16:02 +0530 Subject: [PATCH 09/12] fix for #60695 fix Series constructor dropping key levels when keys have varying entry counts --- pandas/core/indexes/multi.py | 158 +++++++++++++++++++++++------------ 1 file changed, 103 insertions(+), 55 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 12b020655f0b8..26fdf3aec1c27 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1,37 +1,59 @@ from __future__ import annotations -from typing import ( - TYPE_CHECKING, - Any, +from collections.abc import ( Callable, Collection, + Generator, Hashable, Iterable, - Iterator, - List, - Literal, - Mapping, Sequence, +) +from functools import wraps +from sys import getsizeof +from typing import ( + TYPE_CHECKING, + Any, + Literal, cast, - overload, ) +import warnings import numpy as np -from pandas._libs import lib -from pandas._libs.hashtable import duplicated_int64 +from pandas._config import get_option + +from pandas._libs import ( + algos as libalgos, + index as libindex, + lib, +) +from pandas._libs.hashtable import duplicated from pandas._typing import ( - ArrayLike, + AnyAll, + AnyArrayLike, Axis, + DropKeep, DtypeObj, F, + IgnoreRaise, + IndexLabel, + IndexT, + Scalar, + Self, Shape, npt, ) -from pandas.errors import InvalidIndexError +from pandas.compat.numpy import function as nv +from pandas.errors import ( + InvalidIndexError, + PerformanceWarning, + UnsortedIndexError, +) from pandas.util._decorators import ( + Appender, cache_readonly, doc, + set_module, ) from pandas.util._exceptions import find_stack_level @@ -39,17 +61,42 @@ from pandas.core.dtypes.common import ( ensure_int64, ensure_platform_int, - is_categorical_dtype, - is_extension_array_dtype, + is_hashable, + is_integer, + is_iterator, is_list_like, is_object_dtype, + is_scalar, + is_string_dtype, pandas_dtype, ) -from pandas.core.dtypes.dtypes import ExtensionDtype -from pandas.core.dtypes.missing import array_equivalent, isna +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + ExtensionDtype, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) +from pandas.core.dtypes.inference import is_array_like +from pandas.core.dtypes.missing import ( + array_equivalent, + isna, +) import pandas.core.algorithms as algos -from pandas.core.arrays.categorical import Categorical +from pandas.core.array_algos.putmask import validate_putmask +from pandas.core.arrays import ( + Categorical, + ExtensionArray, +) +from pandas.core.arrays.categorical import ( + factorize_from_iterables, + recode_for_categories, +) +import pandas.core.common as com +from pandas.core.construction import sanitize_array +import pandas.core.indexes.base as ibase from pandas.core.indexes.base import ( Index, _index_shared_docs, @@ -57,9 +104,20 @@ get_unanimous_names, ) from pandas.core.indexes.frozen import FrozenList +from pandas.core.ops.invalid import make_invalid_op +from pandas.core.sorting import ( + get_group_index, + lexsort_indexer, +) + +from pandas.io.formats.printing import pprint_thing if TYPE_CHECKING: - from pandas import DataFrame + from pandas import ( + CategoricalIndex, + DataFrame, + Series, + ) _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update( @@ -468,54 +526,44 @@ def from_tuples( ) -> MultiIndex: """ Convert list of tuples to MultiIndex. - - Parameters - ---------- - tuples : list / sequence of tuple-likes - Each tuple is the index of one row/column. - sortorder : int or None - Level of sortedness (must be lexicographically sorted by that level). - names : list / sequence of str, optional - Names for the levels in the index. - - Returns - ------- - MultiIndex """ if not is_list_like(tuples): raise TypeError("Input must be a list / sequence of tuple-likes.") - - # Handle empty tuples case first - if isinstance(tuples, (list, tuple)) and len(tuples) == 0: - if names is None: - raise TypeError("Cannot infer number of levels from empty list") - names_seq = cast(Sequence[Hashable], names) - arrays: List[ArrayLike] = [[]] * len(names_seq) - return cls.from_arrays(arrays, sortorder=sortorder, names=names) - - # Convert iterator to list + if is_iterator(tuples): tuples = list(tuples) - + tuples = cast(Collection[tuple[Hashable, ...]], tuples) - # Handle numpy array or Index - if isinstance(tuples, (np.ndarray, Index)): + # handling the empty tuple cases + if len(tuples) and all(isinstance(e, tuple) and not e for e in tuples): + codes = [np.zeros(len(tuples))] + levels = [Index(com.asarray_tuplesafe(tuples, dtype=np.dtype("object")))] + return cls( + levels=levels, + codes=codes, + sortorder=sortorder, + names=names, + verify_integrity=False, + ) + + arrays: list[Sequence[Hashable]] + if len(tuples) == 0: + if names is None: + raise TypeError("Cannot infer number of levels from empty list") + # error: Argument 1 to "len" has incompatible type "Hashable"; + # expected "Sized" + arrays = [[]] * len(names) # type: ignore[arg-type] + elif isinstance(tuples, (np.ndarray, Index)): if isinstance(tuples, Index): tuples = np.asarray(tuples._values) - arrays = list(lib.tuples_to_object_array(tuples).T) - return cls.from_arrays(arrays, sortorder=sortorder, names=names) - # Convert to list and normalize - tuples_list = [t if isinstance(t, tuple) else (t,) for t in tuples] - if not tuples_list: - arrays = [] + arrays = list(lib.tuples_to_object_array(tuples).T) + elif isinstance(tuples, list): + arrays = list(lib.to_object_array_tuples(tuples).T) else: - max_length = max(len(t) for t in tuples_list) - result_tuples = [ - t + (np.nan,) * (max_length - len(t)) for t in tuples_list - ] - arrays = list(lib.to_object_array_tuples(result_tuples).T) + arrs = zip(*tuples) + arrays = cast(list[Sequence[Hashable]], arrs) return cls.from_arrays(arrays, sortorder=sortorder, names=names) From 30b27508bb903cb95d4477ce4c88ea6ef09c5804 Mon Sep 17 00:00:00 2001 From: mansoora Date: Sat, 15 Feb 2025 21:21:43 +0530 Subject: [PATCH 10/12] fix for #60695 fix Series constructor dropping key levels when keys have varying entry counts --- pandas/core/indexes/multi.py | 49 +++++++++++++----------------------- 1 file changed, 17 insertions(+), 32 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 26fdf3aec1c27..9c6dacdcfcb3f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -7,6 +7,7 @@ Hashable, Iterable, Sequence, + List, ) from functools import wraps from sys import getsizeof @@ -15,6 +16,7 @@ Any, Literal, cast, + ArrayLike, ) import warnings @@ -517,7 +519,7 @@ def from_arrays( ) @classmethod - @names_compat + @doc(doc_create_index) def from_tuples( cls, tuples: Iterable[tuple[Hashable, ...]], @@ -529,41 +531,24 @@ def from_tuples( """ if not is_list_like(tuples): raise TypeError("Input must be a list / sequence of tuple-likes.") - - if is_iterator(tuples): - tuples = list(tuples) - - tuples = cast(Collection[tuple[Hashable, ...]], tuples) - - # handling the empty tuple cases - if len(tuples) and all(isinstance(e, tuple) and not e for e in tuples): - codes = [np.zeros(len(tuples))] - levels = [Index(com.asarray_tuplesafe(tuples, dtype=np.dtype("object")))] - return cls( - levels=levels, - codes=codes, - sortorder=sortorder, - names=names, - verify_integrity=False, - ) - arrays: list[Sequence[Hashable]] - if len(tuples) == 0: + if isinstance(tuples, (list, tuple)) and len(tuples) == 0: if names is None: raise TypeError("Cannot infer number of levels from empty list") - # error: Argument 1 to "len" has incompatible type "Hashable"; - # expected "Sized" - arrays = [[]] * len(names) # type: ignore[arg-type] - elif isinstance(tuples, (np.ndarray, Index)): - if isinstance(tuples, Index): - tuples = np.asarray(tuples._values) - - arrays = list(lib.tuples_to_object_array(tuples).T) - elif isinstance(tuples, list): - arrays = list(lib.to_object_array_tuples(tuples).T) + names_seq = cast(Sequence[Hashable], names) + arrays: List[ArrayLike] = [[]] * len(names_seq) + return cls.from_arrays(arrays, sortorder=sortorder, names=names) + + # Convert to list and normalize + tuples_list = [t if isinstance(t, tuple) else (t,) for t in tuples] + if not tuples_list: + arrays = [] else: - arrs = zip(*tuples) - arrays = cast(list[Sequence[Hashable]], arrs) + max_length = max(len(t) for t in tuples_list) + result_tuples = [ + t + (np.nan,) * (max_length - len(t)) for t in tuples_list + ] + arrays = list(lib.to_object_array_tuples(result_tuples).T) return cls.from_arrays(arrays, sortorder=sortorder, names=names) From ca62d46389c74971586005f81fbbc1386274274e Mon Sep 17 00:00:00 2001 From: mansoora Date: Sat, 15 Feb 2025 21:29:36 +0530 Subject: [PATCH 11/12] fix for #60695 fix Series constructor dropping key levels when keys have varying entry counts --- pandas/core/indexes/multi.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 9c6dacdcfcb3f..a8241a4b796f3 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -6,6 +6,7 @@ Generator, Hashable, Iterable, + Mapping, Sequence, List, ) @@ -17,6 +18,7 @@ Literal, cast, ArrayLike, + overload, ) import warnings @@ -29,7 +31,7 @@ index as libindex, lib, ) -from pandas._libs.hashtable import duplicated +from pandas._libs.hashtable import duplicated, duplicated_int64 from pandas._typing import ( AnyAll, AnyArrayLike, From 4c190543ce5ebf60eacc6a32870147bfca1cd621 Mon Sep 17 00:00:00 2001 From: mansoora Date: Sat, 15 Feb 2025 21:36:59 +0530 Subject: [PATCH 12/12] fix for #60695 fix Series constructor dropping key levels when keys have varying entry counts --- pandas/core/indexes/multi.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index a8241a4b796f3..50d7c1e5d752a 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -9,6 +9,7 @@ Mapping, Sequence, List, + Iterator, ) from functools import wraps from sys import getsizeof