From 4982c1036729872d02ca0031818e9338ccb66346 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 22 Mar 2024 14:36:44 -0700 Subject: [PATCH 1/2] clean up _make_concat_multiindex --- pandas/core/reshape/concat.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 35a08e0167924..386d7a24e2bc0 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -742,6 +742,12 @@ def _concat_indexes(indexes) -> Index: return indexes[0].append(indexes[1:]) +def validate_unique_levels(levels: list[Index]) -> None: + for level in levels: + if not level.is_unique: + raise ValueError(f"Level values not unique: {level.tolist()}") + + def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiIndex: if (levels is None and isinstance(keys[0], tuple)) or ( levels is not None and len(levels) > 1 @@ -754,6 +760,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde _, levels = factorize_from_iterables(zipped) else: levels = [ensure_index(x) for x in levels] + validate_unique_levels(levels) else: zipped = [keys] if names is None: @@ -763,12 +770,9 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde levels = [ensure_index(keys).unique()] else: levels = [ensure_index(x) for x in levels] + validate_unique_levels(levels) - for level in levels: - if not level.is_unique: - raise ValueError(f"Level values not unique: {level.tolist()}") - - if not all_indexes_same(indexes) or not all(level.is_unique for level in levels): + if not all_indexes_same(indexes): codes_list = [] # things are potentially different sizes, so compute the exact codes From d4ba6ad941a6ca1613cbbe758ac521640b274f42 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 22 Mar 2024 14:38:00 -0700 Subject: [PATCH 2/2] Inline verify_integrity check --- pandas/core/reshape/concat.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 386d7a24e2bc0..b1f662b6f231f 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -635,16 +635,13 @@ def _get_concat_axis(self) -> Index: indexes, self.keys, self.levels, self.names ) - self._maybe_check_integrity(concat_axis) - - return concat_axis - - def _maybe_check_integrity(self, concat_index: Index) -> None: if self.verify_integrity: - if not concat_index.is_unique: - overlap = concat_index[concat_index.duplicated()].unique() + if not concat_axis.is_unique: + overlap = concat_axis[concat_axis.duplicated()].unique() raise ValueError(f"Indexes have overlapping values: {overlap}") + return concat_axis + def _clean_keys_and_objs( objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame],