Skip to content

Commit a769e38

Browse files
authored
BUG: Inconsistent conversion of missing column names (#44878)
1 parent 46ddde7 commit a769e38

File tree

8 files changed

+40
-27
lines changed

8 files changed

+40
-27
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -856,6 +856,7 @@ Other
856856
- Bug in :meth:`DataFrame.shift` with ``axis=1`` and ``ExtensionDtype`` columns incorrectly raising when an incompatible ``fill_value`` is passed (:issue:`44564`)
857857
- Bug in :meth:`DataFrame.diff` when passing a NumPy integer object instead of an ``int`` object (:issue:`44572`)
858858
- Bug in :meth:`Series.replace` raising ``ValueError`` when using ``regex=True`` with a :class:`Series` containing ``np.nan`` values (:issue:`43344`)
859+
- Bug in :meth:`DataFrame.to_records` where an incorrect ``n`` was used when missing names were replaced by ``level_n`` (:issue:`44818`)
859860

860861
.. ***DO NOT USE THIS SECTION***
861862

pandas/core/common.py

+21
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818
Any,
1919
Callable,
2020
Collection,
21+
Hashable,
2122
Iterable,
2223
Iterator,
24+
Sequence,
2325
cast,
2426
overload,
2527
)
@@ -604,3 +606,22 @@ def is_builtin_func(arg):
604606
otherwise return the arg
605607
"""
606608
return _builtin_table.get(arg, arg)
609+
610+
611+
def fill_missing_names(names: Sequence[Hashable | None]) -> list[Hashable]:
612+
"""
613+
If a name is missing then replace it by level_n, where n is the count
614+
615+
.. versionadded:: 1.4.0
616+
617+
Parameters
618+
----------
619+
names : list-like
620+
list of column names or None values.
621+
622+
Returns
623+
-------
624+
list
625+
list of column names with the None values replaced.
626+
"""
627+
return [f"level_{i}" if name is None else name for i, name in enumerate(names)]

pandas/core/frame.py

+2-9
Original file line numberDiff line numberDiff line change
@@ -2370,11 +2370,7 @@ def to_records(
23702370
index_names = list(self.index.names)
23712371

23722372
if isinstance(self.index, MultiIndex):
2373-
count = 0
2374-
for i, n in enumerate(index_names):
2375-
if n is None:
2376-
index_names[i] = f"level_{count}"
2377-
count += 1
2373+
index_names = com.fill_missing_names(index_names)
23782374
elif index_names[0] is None:
23792375
index_names = ["index"]
23802376

@@ -5796,10 +5792,7 @@ class max type
57965792
if not drop:
57975793
to_insert: Iterable[tuple[Any, Any | None]]
57985794
if isinstance(self.index, MultiIndex):
5799-
names = [
5800-
(n if n is not None else f"level_{i}")
5801-
for i, n in enumerate(self.index.names)
5802-
]
5795+
names = com.fill_missing_names(self.index.names)
58035796
to_insert = zip(self.index.levels, self.index.codes)
58045797
else:
58055798
default = "index" if "index" not in self else "level_0"

pandas/core/indexes/multi.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -736,11 +736,9 @@ def dtypes(self) -> Series:
736736
"""
737737
from pandas import Series
738738

739+
names = com.fill_missing_names([level.name for level in self.levels])
739740
return Series(
740-
{
741-
f"level_{idx}" if level.name is None else level.name: level.dtype
742-
for idx, level in enumerate(self.levels)
743-
}
741+
{names[idx]: level.dtype for idx, level in enumerate(self.levels)}
744742
)
745743

746744
def __len__(self) -> int:

pandas/io/json/_table_schema.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -103,11 +103,7 @@ def set_default_names(data):
103103

104104
data = data.copy()
105105
if data.index.nlevels > 1:
106-
names = [
107-
name if name is not None else f"level_{i}"
108-
for i, name in enumerate(data.index.names)
109-
]
110-
data.index.names = names
106+
data.index.names = com.fill_missing_names(data.index.names)
111107
else:
112108
data.index.name = data.index.name or "index"
113109
return data

pandas/io/pytables.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -3377,9 +3377,7 @@ def validate_multiindex(
33773377
validate that we can store the multi-index; reset and return the
33783378
new object
33793379
"""
3380-
levels = [
3381-
l if l is not None else f"level_{i}" for i, l in enumerate(obj.index.names)
3382-
]
3380+
levels = com.fill_missing_names(obj.index.names)
33833381
try:
33843382
reset_obj = obj.reset_index()
33853383
except ValueError as err:

pandas/io/sql.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
Series,
4545
)
4646
from pandas.core.base import PandasObject
47+
import pandas.core.common as com
4748
from pandas.core.tools.datetimes import to_datetime
4849
from pandas.util.version import Version
4950

@@ -1010,10 +1011,7 @@ def _index_name(self, index, index_label):
10101011
):
10111012
return ["index"]
10121013
else:
1013-
return [
1014-
l if l is not None else f"level_{i}"
1015-
for i, l in enumerate(self.frame.index.names)
1016-
]
1014+
return com.fill_missing_names(self.frame.index.names)
10171015

10181016
# for reading: index=(list of) string to specify column to set as index
10191017
elif isinstance(index, str):

pandas/tests/frame/methods/test_to_records.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,16 @@ def test_to_records_index_name(self):
9090

9191
df.index = MultiIndex.from_tuples([("a", "x"), ("a", "y"), ("b", "z")])
9292
df.index.names = ["A", None]
93-
rs = df.to_records()
94-
assert "level_0" in rs.dtype.fields
93+
result = df.to_records()
94+
expected = np.rec.fromarrays(
95+
[np.array(["a", "a", "b"]), np.array(["x", "y", "z"])]
96+
+ [np.asarray(df.iloc[:, i]) for i in range(3)],
97+
dtype={
98+
"names": ["A", "level_1", "0", "1", "2"],
99+
"formats": ["<U1", "<U1", "<f8", "<f8", "<f8"],
100+
},
101+
)
102+
tm.assert_numpy_array_equal(result, expected)
95103

96104
def test_to_records_with_unicode_index(self):
97105
# GH#13172

0 commit comments

Comments
 (0)