Skip to content

Commit 1593665

Browse files
BUG: Fix aligning a DataFrame with a Series with MultiIndex
When aligning a DataFrame to a Series we are using Series.reindex() to broadcast the Series data to the new index. That introduces NaNs when the new index rows are not identical to the existing ones, which is not the case when we introduce a new MultiIndex level. In this patch we use the same approach as for aligning a Series to another Series. That means that we have to replicate a part of Series._reindex_indexer as DataFrame does not have it.
1 parent 77d9237 commit 1593665

File tree

4 files changed

+174
-17
lines changed

4 files changed

+174
-17
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,7 @@ Reshaping
427427
^^^^^^^^^
428428
- Bug in :func:`concat` between a :class:`Series` with integer dtype and another with :class:`CategoricalDtype` with integer categories and containing ``NaN`` values casting to object dtype instead of ``float64`` (:issue:`45359`)
429429
- Bug in :func:`get_dummies` that selected object and categorical dtypes but not string (:issue:`44965`)
430+
- Bug in :meth:`DataFrame.align` when aligning a :class:`MultiIndex` to a :class:`Series` with another :class:`MultiIndex` (:issue:`46001`)
430431
-
431432

432433
Sparse

pandas/core/generic.py

+36-17
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@
114114
)
115115

116116
from pandas.core import (
117+
algorithms,
117118
arraylike,
118119
indexing,
119120
missing,
@@ -8963,10 +8964,14 @@ def _align_series(
89638964

89648965
is_series = isinstance(self, ABCSeries)
89658966

8967+
if (not is_series and axis is None) or axis not in [None, 0, 1]:
8968+
raise ValueError("Must specify axis=0 or 1")
8969+
8970+
if is_series and axis == 1:
8971+
raise ValueError("cannot align series to a series other than axis 0")
8972+
89668973
# series/series compat, other must always be a Series
8967-
if is_series:
8968-
if axis:
8969-
raise ValueError("cannot align series to a series other than axis 0")
8974+
if not axis:
89708975

89718976
# equal
89728977
if self.index.equals(other.index):
@@ -8976,26 +8981,40 @@ def _align_series(
89768981
other.index, how=join, level=level, return_indexers=True
89778982
)
89788983

8979-
left = self._reindex_indexer(join_index, lidx, copy)
8984+
if is_series:
8985+
left = self._reindex_indexer(join_index, lidx, copy)
8986+
elif join_index is None:
8987+
left = self.copy() if copy else self
8988+
else:
8989+
data = {
8990+
c: algorithms.take_nd(
8991+
self.__getitem__(c)._values,
8992+
lidx,
8993+
allow_fill=True,
8994+
fill_value=None,
8995+
)
8996+
for c in self.columns
8997+
}
8998+
left = self._constructor(
8999+
data=data, columns=self.columns, index=join_index
9000+
)
9001+
89809002
right = other._reindex_indexer(join_index, ridx, copy)
89819003

89829004
else:
9005+
89839006
# one has > 1 ndim
89849007
fdata = self._mgr
8985-
if axis in [0, 1]:
8986-
join_index = self.axes[axis]
8987-
lidx, ridx = None, None
8988-
if not join_index.equals(other.index):
8989-
join_index, lidx, ridx = join_index.join(
8990-
other.index, how=join, level=level, return_indexers=True
8991-
)
8992-
8993-
if lidx is not None:
8994-
bm_axis = self._get_block_manager_axis(axis)
8995-
fdata = fdata.reindex_indexer(join_index, lidx, axis=bm_axis)
9008+
join_index = self.axes[1]
9009+
lidx, ridx = None, None
9010+
if not join_index.equals(other.index):
9011+
join_index, lidx, ridx = join_index.join(
9012+
other.index, how=join, level=level, return_indexers=True
9013+
)
89969014

8997-
else:
8998-
raise ValueError("Must specify axis=0 or 1")
9015+
if lidx is not None:
9016+
bm_axis = self._get_block_manager_axis(1)
9017+
fdata = fdata.reindex_indexer(join_index, lidx, axis=bm_axis)
89999018

90009019
if copy and fdata is self._mgr:
90019020
fdata = fdata.copy()

pandas/tests/frame/methods/test_align.py

+23
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,29 @@ def test_align_series_combinations(self):
243243
tm.assert_series_equal(res1, exp2)
244244
tm.assert_frame_equal(res2, exp1)
245245

246+
def test_multiindex_align_to_series_with_common_index_level(self):
247+
# GH-46001
248+
foo_index = Index([1, 2, 3], name="foo")
249+
bar_index = Index([1, 2], name="bar")
250+
251+
series = Series([1, 2], index=bar_index, name="foo_series")
252+
df = DataFrame(
253+
{"col": np.arange(6)},
254+
index=pd.MultiIndex.from_product([foo_index, bar_index]),
255+
)
256+
257+
expected = Series([1, 2] * 3, index=df.index, name="foo_series")
258+
_, result = df.align(series, axis=0)
259+
260+
tm.assert_series_equal(result, expected)
261+
262+
def test_missing_axis_specification_exception(self):
263+
df = DataFrame(np.arange(50).reshape((10, 5)))
264+
s = Series(np.arange(5))
265+
266+
with pytest.raises(ValueError, match=r"axis=0 or 1"):
267+
df.align(s)
268+
246269
def _check_align(self, a, b, axis, fill_axis, how, method, limit=None):
247270
aa, ab = a.align(
248271
b, axis=axis, join=how, method=method, limit=limit, fill_axis=fill_axis

pandas/tests/frame/test_arithmetic.py

+114
Original file line numberDiff line numberDiff line change
@@ -1072,6 +1072,120 @@ def test_frame_single_columns_object_sum_axis_1():
10721072
tm.assert_series_equal(result, expected)
10731073

10741074

1075+
def test_frame_multi_index_operations():
1076+
# GH 43321
1077+
df = DataFrame(
1078+
{2010: [1, 2, 3], 2020: [3, 4, 5]},
1079+
index=MultiIndex.from_product(
1080+
[["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
1081+
),
1082+
)
1083+
1084+
s = Series(
1085+
[0.4],
1086+
index=MultiIndex.from_product([["b"], ["a"]], names=["mod", "scen"]),
1087+
)
1088+
1089+
expected = DataFrame(
1090+
{2010: [1.4, 2.4, 3.4], 2020: [3.4, 4.4, 5.4]},
1091+
index=MultiIndex.from_product(
1092+
[["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
1093+
),
1094+
)
1095+
result = df.add(s, axis=0)
1096+
1097+
tm.assert_frame_equal(result, expected)
1098+
1099+
1100+
def test_frame_multi_index_operations_series_index_to_frame_index():
1101+
# GH 43321
1102+
df = DataFrame(
1103+
{2010: [1], 2020: [3]},
1104+
index=MultiIndex.from_product([["a"], ["b"]], names=["scen", "mod"]),
1105+
)
1106+
1107+
s = Series(
1108+
[10.0, 20.0, 30.0],
1109+
index=MultiIndex.from_product(
1110+
[["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
1111+
),
1112+
)
1113+
1114+
expected = DataFrame(
1115+
{2010: [11.0, 21, 31.0], 2020: [13.0, 23.0, 33.0]},
1116+
index=MultiIndex.from_product(
1117+
[["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
1118+
),
1119+
)
1120+
result = df.add(s, axis=0)
1121+
1122+
tm.assert_frame_equal(result, expected)
1123+
1124+
1125+
def test_frame_multi_index_operations_no_align():
1126+
df = DataFrame(
1127+
{2010: [1, 2, 3], 2020: [3, 4, 5]},
1128+
index=MultiIndex.from_product(
1129+
[["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
1130+
),
1131+
)
1132+
1133+
s = Series(
1134+
[0.4],
1135+
index=MultiIndex.from_product([["c"], ["a"]], names=["mod", "scen"]),
1136+
)
1137+
1138+
expected = DataFrame(
1139+
{2010: np.nan, 2020: np.nan},
1140+
index=MultiIndex.from_tuples(
1141+
[
1142+
("a", "b", 0),
1143+
("a", "b", 1),
1144+
("a", "b", 2),
1145+
("a", "c", np.nan),
1146+
],
1147+
names=["scen", "mod", "id"],
1148+
),
1149+
)
1150+
result = df.add(s, axis=0)
1151+
1152+
tm.assert_frame_equal(result, expected)
1153+
1154+
1155+
def test_frame_multi_index_operations_part_align():
1156+
df = DataFrame(
1157+
{2010: [1, 2, 3], 2020: [3, 4, 5]},
1158+
index=MultiIndex.from_tuples(
1159+
[
1160+
("a", "b", 0),
1161+
("a", "b", 1),
1162+
("a", "c", 2),
1163+
],
1164+
names=["scen", "mod", "id"],
1165+
),
1166+
)
1167+
1168+
s = Series(
1169+
[0.4],
1170+
index=MultiIndex.from_product([["b"], ["a"]], names=["mod", "scen"]),
1171+
)
1172+
1173+
expected = DataFrame(
1174+
{2010: [1.4, 2.4, np.nan], 2020: [3.4, 4.4, np.nan]},
1175+
index=MultiIndex.from_tuples(
1176+
[
1177+
("a", "b", 0),
1178+
("a", "b", 1),
1179+
("a", "c", 2),
1180+
],
1181+
names=["scen", "mod", "id"],
1182+
),
1183+
)
1184+
result = df.add(s, axis=0)
1185+
1186+
tm.assert_frame_equal(result, expected)
1187+
1188+
10751189
# -------------------------------------------------------------------
10761190
# Unsorted
10771191
# These arithmetic tests were previously in other files, eventually

0 commit comments

Comments
 (0)