Skip to content

BUG: Inconsistent conversion of missing column names #44878

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Dec 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -856,6 +856,7 @@ Other
- Bug in :meth:`DataFrame.shift` with ``axis=1`` and ``ExtensionDtype`` columns incorrectly raising when an incompatible ``fill_value`` is passed (:issue:`44564`)
- Bug in :meth:`DataFrame.diff` when passing a NumPy integer object instead of an ``int`` object (:issue:`44572`)
- Bug in :meth:`Series.replace` raising ``ValueError`` when using ``regex=True`` with a :class:`Series` containing ``np.nan`` values (:issue:`43344`)
- Bug in :meth:`DataFrame.to_records` where an incorrect ``n`` was used when missing names were replaced by ``level_n`` (:issue:`44818`)

.. ***DO NOT USE THIS SECTION***

Expand Down
21 changes: 21 additions & 0 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
Any,
Callable,
Collection,
Hashable,
Iterable,
Iterator,
Sequence,
cast,
overload,
)
Expand Down Expand Up @@ -604,3 +606,22 @@ def is_builtin_func(arg):
otherwise return the arg
"""
return _builtin_table.get(arg, arg)


def fill_missing_names(names: Sequence[Hashable | None]) -> list[Hashable]:
"""
If a name is missing then replace it by level_n, where n is the count

.. versionadded:: 1.4.0

Parameters
----------
names : list-like
list of column names or None values.

Returns
-------
list
list of column names with the None values replaced.
"""
return [f"level_{i}" if name is None else name for i, name in enumerate(names)]
11 changes: 2 additions & 9 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2370,11 +2370,7 @@ def to_records(
index_names = list(self.index.names)

if isinstance(self.index, MultiIndex):
count = 0
for i, n in enumerate(index_names):
if n is None:
index_names[i] = f"level_{count}"
count += 1
index_names = com.fill_missing_names(index_names)
elif index_names[0] is None:
index_names = ["index"]

Expand Down Expand Up @@ -5796,10 +5792,7 @@ class max type
if not drop:
to_insert: Iterable[tuple[Any, Any | None]]
if isinstance(self.index, MultiIndex):
names = [
(n if n is not None else f"level_{i}")
for i, n in enumerate(self.index.names)
]
names = com.fill_missing_names(self.index.names)
to_insert = zip(self.index.levels, self.index.codes)
else:
default = "index" if "index" not in self else "level_0"
Expand Down
6 changes: 2 additions & 4 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -736,11 +736,9 @@ def dtypes(self) -> Series:
"""
from pandas import Series

names = com.fill_missing_names([level.name for level in self.levels])
return Series(
{
f"level_{idx}" if level.name is None else level.name: level.dtype
for idx, level in enumerate(self.levels)
}
{names[idx]: level.dtype for idx, level in enumerate(self.levels)}
)

def __len__(self) -> int:
Expand Down
6 changes: 1 addition & 5 deletions pandas/io/json/_table_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,7 @@ def set_default_names(data):

data = data.copy()
if data.index.nlevels > 1:
names = [
name if name is not None else f"level_{i}"
for i, name in enumerate(data.index.names)
]
data.index.names = names
data.index.names = com.fill_missing_names(data.index.names)
else:
data.index.name = data.index.name or "index"
return data
Expand Down
4 changes: 1 addition & 3 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3377,9 +3377,7 @@ def validate_multiindex(
validate that we can store the multi-index; reset and return the
new object
"""
levels = [
l if l is not None else f"level_{i}" for i, l in enumerate(obj.index.names)
]
levels = com.fill_missing_names(obj.index.names)
try:
reset_obj = obj.reset_index()
except ValueError as err:
Expand Down
6 changes: 2 additions & 4 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
Series,
)
from pandas.core.base import PandasObject
import pandas.core.common as com
from pandas.core.tools.datetimes import to_datetime
from pandas.util.version import Version

Expand Down Expand Up @@ -1010,10 +1011,7 @@ def _index_name(self, index, index_label):
):
return ["index"]
else:
return [
l if l is not None else f"level_{i}"
for i, l in enumerate(self.frame.index.names)
]
return com.fill_missing_names(self.frame.index.names)

# for reading: index=(list of) string to specify column to set as index
elif isinstance(index, str):
Expand Down
12 changes: 10 additions & 2 deletions pandas/tests/frame/methods/test_to_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,16 @@ def test_to_records_index_name(self):

df.index = MultiIndex.from_tuples([("a", "x"), ("a", "y"), ("b", "z")])
df.index.names = ["A", None]
rs = df.to_records()
assert "level_0" in rs.dtype.fields
result = df.to_records()
expected = np.rec.fromarrays(
[np.array(["a", "a", "b"]), np.array(["x", "y", "z"])]
+ [np.asarray(df.iloc[:, i]) for i in range(3)],
dtype={
"names": ["A", "level_1", "0", "1", "2"],
"formats": ["<U1", "<U1", "<f8", "<f8", "<f8"],
},
)
tm.assert_numpy_array_equal(result, expected)

def test_to_records_with_unicode_index(self):
# GH#13172
Expand Down