diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 2b1182414ca2f..4b4906254a730 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -856,6 +856,7 @@ Other - Bug in :meth:`DataFrame.shift` with ``axis=1`` and ``ExtensionDtype`` columns incorrectly raising when an incompatible ``fill_value`` is passed (:issue:`44564`) - Bug in :meth:`DataFrame.diff` when passing a NumPy integer object instead of an ``int`` object (:issue:`44572`) - Bug in :meth:`Series.replace` raising ``ValueError`` when using ``regex=True`` with a :class:`Series` containing ``np.nan`` values (:issue:`43344`) +- Bug in :meth:`DataFrame.to_records` where an incorrect ``n`` was used when missing names were replaced by ``level_n`` (:issue:`44818`) .. ***DO NOT USE THIS SECTION*** diff --git a/pandas/core/common.py b/pandas/core/common.py index 590296c4b12f5..2ebdfccc88f4e 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -18,8 +18,10 @@ Any, Callable, Collection, + Hashable, Iterable, Iterator, + Sequence, cast, overload, ) @@ -604,3 +606,22 @@ def is_builtin_func(arg): otherwise return the arg """ return _builtin_table.get(arg, arg) + + +def fill_missing_names(names: Sequence[Hashable | None]) -> list[Hashable]: + """ + If a name is missing then replace it by level_n, where n is the count + + .. versionadded:: 1.4.0 + + Parameters + ---------- + names : list-like + list of column names or None values. + + Returns + ------- + list + list of column names with the None values replaced. + """ + return [f"level_{i}" if name is None else name for i, name in enumerate(names)] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index fa5e9dc51419a..b73b064f4dc95 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2370,11 +2370,7 @@ def to_records( index_names = list(self.index.names) if isinstance(self.index, MultiIndex): - count = 0 - for i, n in enumerate(index_names): - if n is None: - index_names[i] = f"level_{count}" - count += 1 + index_names = com.fill_missing_names(index_names) elif index_names[0] is None: index_names = ["index"] @@ -5796,10 +5792,7 @@ class max type if not drop: to_insert: Iterable[tuple[Any, Any | None]] if isinstance(self.index, MultiIndex): - names = [ - (n if n is not None else f"level_{i}") - for i, n in enumerate(self.index.names) - ] + names = com.fill_missing_names(self.index.names) to_insert = zip(self.index.levels, self.index.codes) else: default = "index" if "index" not in self else "level_0" diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 88b37ffaa9493..a764a49719ad9 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -736,11 +736,9 @@ def dtypes(self) -> Series: """ from pandas import Series + names = com.fill_missing_names([level.name for level in self.levels]) return Series( - { - f"level_{idx}" if level.name is None else level.name: level.dtype - for idx, level in enumerate(self.levels) - } + {names[idx]: level.dtype for idx, level in enumerate(self.levels)} ) def __len__(self) -> int: diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 75fd950cd6076..ed33784f44464 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -103,11 +103,7 @@ def set_default_names(data): data = data.copy() if data.index.nlevels > 1: - names = [ - name if name is not None else f"level_{i}" - for i, name in enumerate(data.index.names) - ] - data.index.names = names + data.index.names = com.fill_missing_names(data.index.names) else: data.index.name = data.index.name or "index" return data diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 18b2ff3837a15..997a6bfc67dbc 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3377,9 +3377,7 @@ def validate_multiindex( validate that we can store the multi-index; reset and return the new object """ - levels = [ - l if l is not None else f"level_{i}" for i, l in enumerate(obj.index.names) - ] + levels = com.fill_missing_names(obj.index.names) try: reset_obj = obj.reset_index() except ValueError as err: diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 26869a660f4b4..548bd617a285f 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -44,6 +44,7 @@ Series, ) from pandas.core.base import PandasObject +import pandas.core.common as com from pandas.core.tools.datetimes import to_datetime from pandas.util.version import Version @@ -1010,10 +1011,7 @@ def _index_name(self, index, index_label): ): return ["index"] else: - return [ - l if l is not None else f"level_{i}" - for i, l in enumerate(self.frame.index.names) - ] + return com.fill_missing_names(self.frame.index.names) # for reading: index=(list of) string to specify column to set as index elif isinstance(index, str): diff --git a/pandas/tests/frame/methods/test_to_records.py b/pandas/tests/frame/methods/test_to_records.py index 2c96cf291c154..a2e94782142ac 100644 --- a/pandas/tests/frame/methods/test_to_records.py +++ b/pandas/tests/frame/methods/test_to_records.py @@ -90,8 +90,16 @@ def test_to_records_index_name(self): df.index = MultiIndex.from_tuples([("a", "x"), ("a", "y"), ("b", "z")]) df.index.names = ["A", None] - rs = df.to_records() - assert "level_0" in rs.dtype.fields + result = df.to_records() + expected = np.rec.fromarrays( + [np.array(["a", "a", "b"]), np.array(["x", "y", "z"])] + + [np.asarray(df.iloc[:, i]) for i in range(3)], + dtype={ + "names": ["A", "level_1", "0", "1", "2"], + "formats": ["