Skip to content

Commit 41d248e

Browse files
BUG: Fix aligning a DataFrame with a Series with MultiIndex (#46058)
1 parent 5ab67d7 commit 41d248e

File tree

4 files changed

+243
-17
lines changed

4 files changed

+243
-17
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,7 @@ Reshaping
455455
^^^^^^^^^
456456
- Bug in :func:`concat` between a :class:`Series` with integer dtype and another with :class:`CategoricalDtype` with integer categories and containing ``NaN`` values casting to object dtype instead of ``float64`` (:issue:`45359`)
457457
- Bug in :func:`get_dummies` that selected object and categorical dtypes but not string (:issue:`44965`)
458+
- Bug in :meth:`DataFrame.align` when aligning a :class:`MultiIndex` to a :class:`Series` with another :class:`MultiIndex` (:issue:`46001`)
458459
-
459460

460461
Sparse

pandas/core/generic.py

+33-17
Original file line numberDiff line numberDiff line change
@@ -8991,10 +8991,14 @@ def _align_series(
89918991

89928992
is_series = isinstance(self, ABCSeries)
89938993

8994+
if (not is_series and axis is None) or axis not in [None, 0, 1]:
8995+
raise ValueError("Must specify axis=0 or 1")
8996+
8997+
if is_series and axis == 1:
8998+
raise ValueError("cannot align series to a series other than axis 0")
8999+
89949000
# series/series compat, other must always be a Series
8995-
if is_series:
8996-
if axis:
8997-
raise ValueError("cannot align series to a series other than axis 0")
9001+
if not axis:
89989002

89999003
# equal
90009004
if self.index.equals(other.index):
@@ -9004,26 +9008,38 @@ def _align_series(
90049008
other.index, how=join, level=level, return_indexers=True
90059009
)
90069010

9007-
left = self._reindex_indexer(join_index, lidx, copy)
9011+
if is_series:
9012+
left = self._reindex_indexer(join_index, lidx, copy)
9013+
elif lidx is None:
9014+
left = self.copy() if copy else self
9015+
else:
9016+
data = algos.take_nd(
9017+
self.values,
9018+
lidx,
9019+
allow_fill=True,
9020+
fill_value=None,
9021+
)
9022+
9023+
left = self._constructor(
9024+
data=data, columns=self.columns, index=join_index
9025+
)
9026+
90089027
right = other._reindex_indexer(join_index, ridx, copy)
90099028

90109029
else:
9030+
90119031
# one has > 1 ndim
90129032
fdata = self._mgr
9013-
if axis in [0, 1]:
9014-
join_index = self.axes[axis]
9015-
lidx, ridx = None, None
9016-
if not join_index.equals(other.index):
9017-
join_index, lidx, ridx = join_index.join(
9018-
other.index, how=join, level=level, return_indexers=True
9019-
)
9020-
9021-
if lidx is not None:
9022-
bm_axis = self._get_block_manager_axis(axis)
9023-
fdata = fdata.reindex_indexer(join_index, lidx, axis=bm_axis)
9033+
join_index = self.axes[1]
9034+
lidx, ridx = None, None
9035+
if not join_index.equals(other.index):
9036+
join_index, lidx, ridx = join_index.join(
9037+
other.index, how=join, level=level, return_indexers=True
9038+
)
90249039

9025-
else:
9026-
raise ValueError("Must specify axis=0 or 1")
9040+
if lidx is not None:
9041+
bm_axis = self._get_block_manager_axis(1)
9042+
fdata = fdata.reindex_indexer(join_index, lidx, axis=bm_axis)
90279043

90289044
if copy and fdata is self._mgr:
90299045
fdata = fdata.copy()

pandas/tests/frame/methods/test_align.py

+99
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,105 @@ def test_align_series_combinations(self):
243243
tm.assert_series_equal(res1, exp2)
244244
tm.assert_frame_equal(res2, exp1)
245245

246+
def test_multiindex_align_to_series_with_common_index_level(self):
247+
# GH-46001
248+
foo_index = Index([1, 2, 3], name="foo")
249+
bar_index = Index([1, 2], name="bar")
250+
251+
series = Series([1, 2], index=bar_index, name="foo_series")
252+
df = DataFrame(
253+
{"col": np.arange(6)},
254+
index=pd.MultiIndex.from_product([foo_index, bar_index]),
255+
)
256+
257+
expected_r = Series([1, 2] * 3, index=df.index, name="foo_series")
258+
result_l, result_r = df.align(series, axis=0)
259+
260+
tm.assert_frame_equal(result_l, df)
261+
tm.assert_series_equal(result_r, expected_r)
262+
263+
def test_multiindex_align_to_series_with_common_index_level_missing_in_left(self):
264+
# GH-46001
265+
foo_index = Index([1, 2, 3], name="foo")
266+
bar_index = Index([1, 2], name="bar")
267+
268+
series = Series(
269+
[1, 2, 3, 4], index=Index([1, 2, 3, 4], name="bar"), name="foo_series"
270+
)
271+
df = DataFrame(
272+
{"col": np.arange(6)},
273+
index=pd.MultiIndex.from_product([foo_index, bar_index]),
274+
)
275+
276+
expected_r = Series([1, 2] * 3, index=df.index, name="foo_series")
277+
result_l, result_r = df.align(series, axis=0)
278+
279+
tm.assert_frame_equal(result_l, df)
280+
tm.assert_series_equal(result_r, expected_r)
281+
282+
def test_multiindex_align_to_series_with_common_index_level_missing_in_right(self):
283+
# GH-46001
284+
foo_index = Index([1, 2, 3], name="foo")
285+
bar_index = Index([1, 2, 3, 4], name="bar")
286+
287+
series = Series([1, 2], index=Index([1, 2], name="bar"), name="foo_series")
288+
df = DataFrame(
289+
{"col": np.arange(12)},
290+
index=pd.MultiIndex.from_product([foo_index, bar_index]),
291+
)
292+
293+
expected_r = Series(
294+
[1, 2, np.nan, np.nan] * 3, index=df.index, name="foo_series"
295+
)
296+
result_l, result_r = df.align(series, axis=0)
297+
298+
tm.assert_frame_equal(result_l, df)
299+
tm.assert_series_equal(result_r, expected_r)
300+
301+
def test_multiindex_align_to_series_with_common_index_level_missing_in_both(self):
302+
# GH-46001
303+
foo_index = Index([1, 2, 3], name="foo")
304+
bar_index = Index([1, 3, 4], name="bar")
305+
306+
series = Series(
307+
[1, 2, 3], index=Index([1, 2, 4], name="bar"), name="foo_series"
308+
)
309+
df = DataFrame(
310+
{"col": np.arange(9)},
311+
index=pd.MultiIndex.from_product([foo_index, bar_index]),
312+
)
313+
314+
expected_r = Series([1, np.nan, 3] * 3, index=df.index, name="foo_series")
315+
result_l, result_r = df.align(series, axis=0)
316+
317+
tm.assert_frame_equal(result_l, df)
318+
tm.assert_series_equal(result_r, expected_r)
319+
320+
def test_multiindex_align_to_series_with_common_index_level_non_unique_cols(self):
321+
# GH-46001
322+
foo_index = Index([1, 2, 3], name="foo")
323+
bar_index = Index([1, 2], name="bar")
324+
325+
series = Series([1, 2], index=bar_index, name="foo_series")
326+
df = DataFrame(
327+
np.arange(18).reshape(6, 3),
328+
index=pd.MultiIndex.from_product([foo_index, bar_index]),
329+
)
330+
df.columns = ["cfoo", "cbar", "cfoo"]
331+
332+
expected = Series([1, 2] * 3, index=df.index, name="foo_series")
333+
result_left, result_right = df.align(series, axis=0)
334+
335+
tm.assert_series_equal(result_right, expected)
336+
tm.assert_index_equal(result_left.columns, df.columns)
337+
338+
def test_missing_axis_specification_exception(self):
339+
df = DataFrame(np.arange(50).reshape((10, 5)))
340+
series = Series(np.arange(5))
341+
342+
with pytest.raises(ValueError, match=r"axis=0 or 1"):
343+
df.align(series)
344+
246345
def _check_align(self, a, b, axis, fill_axis, how, method, limit=None):
247346
aa, ab = a.align(
248347
b, axis=axis, join=how, method=method, limit=limit, fill_axis=fill_axis

pandas/tests/frame/test_arithmetic.py

+110
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,116 @@ def test_broadcast_multiindex(self, level):
722722

723723
tm.assert_frame_equal(result, expected)
724724

725+
def test_frame_multiindex_operations(self):
726+
# GH 43321
727+
df = DataFrame(
728+
{2010: [1, 2, 3], 2020: [3, 4, 5]},
729+
index=MultiIndex.from_product(
730+
[["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
731+
),
732+
)
733+
734+
series = Series(
735+
[0.4],
736+
index=MultiIndex.from_product([["b"], ["a"]], names=["mod", "scen"]),
737+
)
738+
739+
expected = DataFrame(
740+
{2010: [1.4, 2.4, 3.4], 2020: [3.4, 4.4, 5.4]},
741+
index=MultiIndex.from_product(
742+
[["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
743+
),
744+
)
745+
result = df.add(series, axis=0)
746+
747+
tm.assert_frame_equal(result, expected)
748+
749+
def test_frame_multiindex_operations_series_index_to_frame_index(self):
750+
# GH 43321
751+
df = DataFrame(
752+
{2010: [1], 2020: [3]},
753+
index=MultiIndex.from_product([["a"], ["b"]], names=["scen", "mod"]),
754+
)
755+
756+
series = Series(
757+
[10.0, 20.0, 30.0],
758+
index=MultiIndex.from_product(
759+
[["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
760+
),
761+
)
762+
763+
expected = DataFrame(
764+
{2010: [11.0, 21, 31.0], 2020: [13.0, 23.0, 33.0]},
765+
index=MultiIndex.from_product(
766+
[["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
767+
),
768+
)
769+
result = df.add(series, axis=0)
770+
771+
tm.assert_frame_equal(result, expected)
772+
773+
def test_frame_multiindex_operations_no_align(self):
774+
df = DataFrame(
775+
{2010: [1, 2, 3], 2020: [3, 4, 5]},
776+
index=MultiIndex.from_product(
777+
[["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
778+
),
779+
)
780+
781+
series = Series(
782+
[0.4],
783+
index=MultiIndex.from_product([["c"], ["a"]], names=["mod", "scen"]),
784+
)
785+
786+
expected = DataFrame(
787+
{2010: np.nan, 2020: np.nan},
788+
index=MultiIndex.from_tuples(
789+
[
790+
("a", "b", 0),
791+
("a", "b", 1),
792+
("a", "b", 2),
793+
("a", "c", np.nan),
794+
],
795+
names=["scen", "mod", "id"],
796+
),
797+
)
798+
result = df.add(series, axis=0)
799+
800+
tm.assert_frame_equal(result, expected)
801+
802+
def test_frame_multiindex_operations_part_align(self):
803+
df = DataFrame(
804+
{2010: [1, 2, 3], 2020: [3, 4, 5]},
805+
index=MultiIndex.from_tuples(
806+
[
807+
("a", "b", 0),
808+
("a", "b", 1),
809+
("a", "c", 2),
810+
],
811+
names=["scen", "mod", "id"],
812+
),
813+
)
814+
815+
series = Series(
816+
[0.4],
817+
index=MultiIndex.from_product([["b"], ["a"]], names=["mod", "scen"]),
818+
)
819+
820+
expected = DataFrame(
821+
{2010: [1.4, 2.4, np.nan], 2020: [3.4, 4.4, np.nan]},
822+
index=MultiIndex.from_tuples(
823+
[
824+
("a", "b", 0),
825+
("a", "b", 1),
826+
("a", "c", 2),
827+
],
828+
names=["scen", "mod", "id"],
829+
),
830+
)
831+
result = df.add(series, axis=0)
832+
833+
tm.assert_frame_equal(result, expected)
834+
725835

726836
class TestFrameArithmetic:
727837
def test_td64_op_nat_casting(self):

0 commit comments

Comments
 (0)