Skip to content

Commit 029982e

Browse files
BUG: Fix aligning a DataFrame with a Series with MultiIndex
When aligning a DataFrame to a Series we are using Series.reindex() to broadcast the Series data to the new index. That introduces NaNs when the new index rows are not identical to the existing ones, which is not the case when we introduce a new MultiIndex level. In this patch we use the same approach as for aligning a Series to another Series. That means that we have to replicate a part of Series._reindex_indexer as DataFrame does not have it.
1 parent 53b3dd5 commit 029982e

File tree

4 files changed

+174
-17
lines changed

4 files changed

+174
-17
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,7 @@ Reshaping
432432
^^^^^^^^^
433433
- Bug in :func:`concat` between a :class:`Series` with integer dtype and another with :class:`CategoricalDtype` with integer categories and containing ``NaN`` values casting to object dtype instead of ``float64`` (:issue:`45359`)
434434
- Bug in :func:`get_dummies` that selected object and categorical dtypes but not string (:issue:`44965`)
435+
- Bug in :meth:`DataFrame.align` when aligning a :class:`MultiIndex` to a :class:`Series` with another :class:`MultiIndex` (:issue:`46001`)
435436
-
436437

437438
Sparse

pandas/core/generic.py

+36-17
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@
114114
)
115115

116116
from pandas.core import (
117+
algorithms,
117118
arraylike,
118119
indexing,
119120
missing,
@@ -8975,10 +8976,14 @@ def _align_series(
89758976

89768977
is_series = isinstance(self, ABCSeries)
89778978

8979+
if (not is_series and axis is None) or axis not in [None, 0, 1]:
8980+
raise ValueError("Must specify axis=0 or 1")
8981+
8982+
if is_series and axis == 1:
8983+
raise ValueError("cannot align series to a series other than axis 0")
8984+
89788985
# series/series compat, other must always be a Series
8979-
if is_series:
8980-
if axis:
8981-
raise ValueError("cannot align series to a series other than axis 0")
8986+
if not axis:
89828987

89838988
# equal
89848989
if self.index.equals(other.index):
@@ -8988,26 +8993,40 @@ def _align_series(
89888993
other.index, how=join, level=level, return_indexers=True
89898994
)
89908995

8991-
left = self._reindex_indexer(join_index, lidx, copy)
8996+
if is_series:
8997+
left = self._reindex_indexer(join_index, lidx, copy)
8998+
elif join_index is None:
8999+
left = self.copy() if copy else self
9000+
else:
9001+
data = {
9002+
c: algorithms.take_nd(
9003+
self[c]._values,
9004+
lidx,
9005+
allow_fill=True,
9006+
fill_value=None,
9007+
)
9008+
for c in self.columns
9009+
}
9010+
left = self._constructor(
9011+
data=data, columns=self.columns, index=join_index
9012+
)
9013+
89929014
right = other._reindex_indexer(join_index, ridx, copy)
89939015

89949016
else:
9017+
89959018
# one has > 1 ndim
89969019
fdata = self._mgr
8997-
if axis in [0, 1]:
8998-
join_index = self.axes[axis]
8999-
lidx, ridx = None, None
9000-
if not join_index.equals(other.index):
9001-
join_index, lidx, ridx = join_index.join(
9002-
other.index, how=join, level=level, return_indexers=True
9003-
)
9004-
9005-
if lidx is not None:
9006-
bm_axis = self._get_block_manager_axis(axis)
9007-
fdata = fdata.reindex_indexer(join_index, lidx, axis=bm_axis)
9020+
join_index = self.axes[1]
9021+
lidx, ridx = None, None
9022+
if not join_index.equals(other.index):
9023+
join_index, lidx, ridx = join_index.join(
9024+
other.index, how=join, level=level, return_indexers=True
9025+
)
90089026

9009-
else:
9010-
raise ValueError("Must specify axis=0 or 1")
9027+
if lidx is not None:
9028+
bm_axis = self._get_block_manager_axis(1)
9029+
fdata = fdata.reindex_indexer(join_index, lidx, axis=bm_axis)
90119030

90129031
if copy and fdata is self._mgr:
90139032
fdata = fdata.copy()

pandas/tests/frame/methods/test_align.py

+23
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,29 @@ def test_align_series_combinations(self):
243243
tm.assert_series_equal(res1, exp2)
244244
tm.assert_frame_equal(res2, exp1)
245245

246+
def test_multiindex_align_to_series_with_common_index_level(self):
247+
# GH-46001
248+
foo_index = Index([1, 2, 3], name="foo")
249+
bar_index = Index([1, 2], name="bar")
250+
251+
series = Series([1, 2], index=bar_index, name="foo_series")
252+
df = DataFrame(
253+
{"col": np.arange(6)},
254+
index=pd.MultiIndex.from_product([foo_index, bar_index]),
255+
)
256+
257+
expected = Series([1, 2] * 3, index=df.index, name="foo_series")
258+
_, result = df.align(series, axis=0)
259+
260+
tm.assert_series_equal(result, expected)
261+
262+
def test_missing_axis_specification_exception(self):
263+
df = DataFrame(np.arange(50).reshape((10, 5)))
264+
s = Series(np.arange(5))
265+
266+
with pytest.raises(ValueError, match=r"axis=0 or 1"):
267+
df.align(s)
268+
246269
def _check_align(self, a, b, axis, fill_axis, how, method, limit=None):
247270
aa, ab = a.align(
248271
b, axis=axis, join=how, method=method, limit=limit, fill_axis=fill_axis

pandas/tests/frame/test_arithmetic.py

+114
Original file line numberDiff line numberDiff line change
@@ -1072,6 +1072,120 @@ def test_frame_single_columns_object_sum_axis_1():
10721072
tm.assert_series_equal(result, expected)
10731073

10741074

1075+
def test_frame_multi_index_operations():
1076+
# GH 43321
1077+
df = DataFrame(
1078+
{2010: [1, 2, 3], 2020: [3, 4, 5]},
1079+
index=MultiIndex.from_product(
1080+
[["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
1081+
),
1082+
)
1083+
1084+
s = Series(
1085+
[0.4],
1086+
index=MultiIndex.from_product([["b"], ["a"]], names=["mod", "scen"]),
1087+
)
1088+
1089+
expected = DataFrame(
1090+
{2010: [1.4, 2.4, 3.4], 2020: [3.4, 4.4, 5.4]},
1091+
index=MultiIndex.from_product(
1092+
[["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
1093+
),
1094+
)
1095+
result = df.add(s, axis=0)
1096+
1097+
tm.assert_frame_equal(result, expected)
1098+
1099+
1100+
def test_frame_multi_index_operations_series_index_to_frame_index():
1101+
# GH 43321
1102+
df = DataFrame(
1103+
{2010: [1], 2020: [3]},
1104+
index=MultiIndex.from_product([["a"], ["b"]], names=["scen", "mod"]),
1105+
)
1106+
1107+
s = Series(
1108+
[10.0, 20.0, 30.0],
1109+
index=MultiIndex.from_product(
1110+
[["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
1111+
),
1112+
)
1113+
1114+
expected = DataFrame(
1115+
{2010: [11.0, 21, 31.0], 2020: [13.0, 23.0, 33.0]},
1116+
index=MultiIndex.from_product(
1117+
[["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
1118+
),
1119+
)
1120+
result = df.add(s, axis=0)
1121+
1122+
tm.assert_frame_equal(result, expected)
1123+
1124+
1125+
def test_frame_multi_index_operations_no_align():
1126+
df = DataFrame(
1127+
{2010: [1, 2, 3], 2020: [3, 4, 5]},
1128+
index=MultiIndex.from_product(
1129+
[["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
1130+
),
1131+
)
1132+
1133+
s = Series(
1134+
[0.4],
1135+
index=MultiIndex.from_product([["c"], ["a"]], names=["mod", "scen"]),
1136+
)
1137+
1138+
expected = DataFrame(
1139+
{2010: np.nan, 2020: np.nan},
1140+
index=MultiIndex.from_tuples(
1141+
[
1142+
("a", "b", 0),
1143+
("a", "b", 1),
1144+
("a", "b", 2),
1145+
("a", "c", np.nan),
1146+
],
1147+
names=["scen", "mod", "id"],
1148+
),
1149+
)
1150+
result = df.add(s, axis=0)
1151+
1152+
tm.assert_frame_equal(result, expected)
1153+
1154+
1155+
def test_frame_multi_index_operations_part_align():
1156+
df = DataFrame(
1157+
{2010: [1, 2, 3], 2020: [3, 4, 5]},
1158+
index=MultiIndex.from_tuples(
1159+
[
1160+
("a", "b", 0),
1161+
("a", "b", 1),
1162+
("a", "c", 2),
1163+
],
1164+
names=["scen", "mod", "id"],
1165+
),
1166+
)
1167+
1168+
s = Series(
1169+
[0.4],
1170+
index=MultiIndex.from_product([["b"], ["a"]], names=["mod", "scen"]),
1171+
)
1172+
1173+
expected = DataFrame(
1174+
{2010: [1.4, 2.4, np.nan], 2020: [3.4, 4.4, np.nan]},
1175+
index=MultiIndex.from_tuples(
1176+
[
1177+
("a", "b", 0),
1178+
("a", "b", 1),
1179+
("a", "c", 2),
1180+
],
1181+
names=["scen", "mod", "id"],
1182+
),
1183+
)
1184+
result = df.add(s, axis=0)
1185+
1186+
tm.assert_frame_equal(result, expected)
1187+
1188+
10751189
# -------------------------------------------------------------------
10761190
# Unsorted
10771191
# These arithmetic tests were previously in other files, eventually

0 commit comments

Comments
 (0)