Skip to content

ENH: MultiIndex.from_product infers names from inputs if not explicitly provided #28417

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Sep 18, 2019
4 changes: 2 additions & 2 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ Enhancements
Other enhancements
^^^^^^^^^^^^^^^^^^

-
- :meth:`MultiIndex.from_product` infers level names from inputs if not explicitly provided (:issue:`27292`)
-

.. _whatsnew_1000.api_breaking:
Expand Down Expand Up @@ -63,7 +63,7 @@ Other API changes
^^^^^^^^^^^^^^^^^

- :meth:`pandas.api.types.infer_dtype` will now return "integer-na" for integer and ``np.nan`` mix (:issue:`27283`)
-
- :meth:`MultiIndex.from_arrays` will no longer infer names from arrays if ``names=None`` is explicitly provided (:issue:`27292`)
-

.. _whatsnew_1000.deprecations:
Expand Down
15 changes: 11 additions & 4 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@
dict(klass="MultiIndex", target_klass="MultiIndex or list of tuples")
)

_no_default_names = object()


class MultiIndexUIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.UInt64Engine):
"""
Expand Down Expand Up @@ -371,7 +373,7 @@ def _verify_integrity(self, codes=None, levels=None):
return new_codes

@classmethod
def from_arrays(cls, arrays, sortorder=None, names=None):
def from_arrays(cls, arrays, sortorder=None, names=_no_default_names):
"""
Convert arrays to MultiIndex.

Expand Down Expand Up @@ -425,7 +427,7 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
raise ValueError("all arrays must be same length")

codes, levels = _factorize_from_iterables(arrays)
if names is None:
if names is _no_default_names:
names = [getattr(arr, "name", None) for arr in arrays]

return MultiIndex(
Expand Down Expand Up @@ -496,7 +498,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None):
return MultiIndex.from_arrays(arrays, sortorder=sortorder, names=names)

@classmethod
def from_product(cls, iterables, sortorder=None, names=None):
def from_product(cls, iterables, sortorder=None, names=_no_default_names):
"""
Make a MultiIndex from the cartesian product of multiple iterables.

Expand All @@ -508,7 +510,9 @@ def from_product(cls, iterables, sortorder=None, names=None):
Level of sortedness (must be lexicographically sorted by that
level).
names : list / sequence of str, optional
Names for the levels in the index.
Names for the levels in the index. If not provided, these
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a versionadded directive here (1.0)

Copy link
Contributor Author

@christopherzimmerman christopherzimmerman Sep 12, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do. Would it make sense to break it out into a ..versionchanged?

names : list / sequence of str, optional
    Names for the levels in the index.

    ..versionchanged 1.0

    If not explicitly provided, names will be inferred from the
    elements of iterables if an element has a name attribute

will be inferred from the elements of iterables if an element
has a name attribute

Returns
-------
Expand Down Expand Up @@ -542,6 +546,9 @@ def from_product(cls, iterables, sortorder=None, names=None):
iterables = list(iterables)

codes, levels = _factorize_from_iterables(iterables)
if names is _no_default_names:
names = [getattr(it, "name", None) for it in iterables]

codes = cartesian_product(codes)
return MultiIndex(levels, codes, sortorder=sortorder, names=names)

Expand Down
50 changes: 50 additions & 0 deletions pandas/tests/indexes/multi/test_constructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,19 @@ def test_from_arrays_different_lengths(idx1, idx2):
MultiIndex.from_arrays([idx1, idx2])


def test_from_arrays_respects_none_names():
# GH27292
a = pd.Series([1, 2, 3], name="foo")
b = pd.Series(["a", "b", "c"], name="bar")

result = MultiIndex.from_arrays([a, b], names=None)
expected = MultiIndex(
levels=[[1, 2, 3], ["a", "b", "c"]], codes=[[0, 1, 2], [0, 1, 2]], names=None
)

tm.assert_index_equal(result, expected)


# ----------------------------------------------------------------------------
# from_tuples
# ----------------------------------------------------------------------------
Expand Down Expand Up @@ -539,6 +552,43 @@ def test_from_product_iterator():
MultiIndex.from_product(0)


@pytest.mark.parametrize(
"a, b, expected_names",
[
(
pd.Series([1, 2, 3], name="foo"),
pd.Series(["a", "b"], name="bar"),
["foo", "bar"],
),
(pd.Series([1, 2, 3], name="foo"), ["a", "b"], ["foo", None]),
([1, 2, 3], ["a", "b"], None),
],
)
def test_from_product_infer_names(a, b, expected_names):
# GH27292
result = MultiIndex.from_product([a, b])
expected = MultiIndex(
levels=[[1, 2, 3], ["a", "b"]],
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
names=expected_names,
)
tm.assert_index_equal(result, expected)


def test_from_product_respects_none_names():
# GH27292
a = pd.Series([1, 2, 3], name="foo")
b = pd.Series(["a", "b"], name="bar")

result = MultiIndex.from_product([a, b], names=None)
expected = MultiIndex(
levels=[[1, 2, 3], ["a", "b"]],
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
names=None,
)
tm.assert_index_equal(result, expected)


def test_create_index_existing_name(idx):

# GH11193, when an existing index is passed, and a new name is not
Expand Down