diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index bc77553924dfa..2a5d9286a7da2 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -27,6 +27,7 @@ Enhancements Other enhancements ^^^^^^^^^^^^^^^^^^ +- :meth:`MultiIndex.from_product` infers level names from inputs if not explicitly provided (:issue:`27292`) - :meth:`DataFrame.to_latex` now accepts ``caption`` and ``label`` arguments (:issue:`25436`) - The :ref:`integer dtype ` with support for missing values can now be converted to ``pyarrow`` (>= 0.15.0), which means that it is supported in writing to the Parquet file format @@ -67,7 +68,7 @@ Other API changes ^^^^^^^^^^^^^^^^^ - :meth:`pandas.api.types.infer_dtype` will now return "integer-na" for integer and ``np.nan`` mix (:issue:`27283`) -- +- :meth:`MultiIndex.from_arrays` will no longer infer names from arrays if ``names=None`` is explicitly provided (:issue:`27292`) - .. _whatsnew_1000.deprecations: diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 761862b9f30e9..3273c4f8cd13b 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -60,6 +60,8 @@ dict(klass="MultiIndex", target_klass="MultiIndex or list of tuples") ) +_no_default_names = object() + class MultiIndexUIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.UInt64Engine): """ @@ -371,7 +373,7 @@ def _verify_integrity(self, codes=None, levels=None): return new_codes @classmethod - def from_arrays(cls, arrays, sortorder=None, names=None): + def from_arrays(cls, arrays, sortorder=None, names=_no_default_names): """ Convert arrays to MultiIndex. @@ -425,7 +427,7 @@ def from_arrays(cls, arrays, sortorder=None, names=None): raise ValueError("all arrays must be same length") codes, levels = _factorize_from_iterables(arrays) - if names is None: + if names is _no_default_names: names = [getattr(arr, "name", None) for arr in arrays] return MultiIndex( @@ -496,7 +498,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None): return MultiIndex.from_arrays(arrays, sortorder=sortorder, names=names) @classmethod - def from_product(cls, iterables, sortorder=None, names=None): + def from_product(cls, iterables, sortorder=None, names=_no_default_names): """ Make a MultiIndex from the cartesian product of multiple iterables. @@ -510,6 +512,11 @@ def from_product(cls, iterables, sortorder=None, names=None): names : list / sequence of str, optional Names for the levels in the index. + .. versionchanged:: 1.0.0 + + If not explicitly provided, names will be inferred from the + elements of iterables if an element has a name attribute + Returns ------- index : MultiIndex @@ -542,6 +549,9 @@ def from_product(cls, iterables, sortorder=None, names=None): iterables = list(iterables) codes, levels = _factorize_from_iterables(iterables) + if names is _no_default_names: + names = [getattr(it, "name", None) for it in iterables] + codes = cartesian_product(codes) return MultiIndex(levels, codes, sortorder=sortorder, names=names) diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py index 86c9ee3455d0b..9472d539537ba 100644 --- a/pandas/tests/indexes/multi/test_constructor.py +++ b/pandas/tests/indexes/multi/test_constructor.py @@ -348,6 +348,19 @@ def test_from_arrays_different_lengths(idx1, idx2): MultiIndex.from_arrays([idx1, idx2]) +def test_from_arrays_respects_none_names(): + # GH27292 + a = pd.Series([1, 2, 3], name="foo") + b = pd.Series(["a", "b", "c"], name="bar") + + result = MultiIndex.from_arrays([a, b], names=None) + expected = MultiIndex( + levels=[[1, 2, 3], ["a", "b", "c"]], codes=[[0, 1, 2], [0, 1, 2]], names=None + ) + + tm.assert_index_equal(result, expected) + + # ---------------------------------------------------------------------------- # from_tuples # ---------------------------------------------------------------------------- @@ -539,6 +552,43 @@ def test_from_product_iterator(): MultiIndex.from_product(0) +@pytest.mark.parametrize( + "a, b, expected_names", + [ + ( + pd.Series([1, 2, 3], name="foo"), + pd.Series(["a", "b"], name="bar"), + ["foo", "bar"], + ), + (pd.Series([1, 2, 3], name="foo"), ["a", "b"], ["foo", None]), + ([1, 2, 3], ["a", "b"], None), + ], +) +def test_from_product_infer_names(a, b, expected_names): + # GH27292 + result = MultiIndex.from_product([a, b]) + expected = MultiIndex( + levels=[[1, 2, 3], ["a", "b"]], + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + names=expected_names, + ) + tm.assert_index_equal(result, expected) + + +def test_from_product_respects_none_names(): + # GH27292 + a = pd.Series([1, 2, 3], name="foo") + b = pd.Series(["a", "b"], name="bar") + + result = MultiIndex.from_product([a, b], names=None) + expected = MultiIndex( + levels=[[1, 2, 3], ["a", "b"]], + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + names=None, + ) + tm.assert_index_equal(result, expected) + + def test_create_index_existing_name(idx): # GH11193, when an existing index is passed, and a new name is not