Skip to content

Commit 7d1e6ad

Browse files
BUG: Fix aligning a DataFrame with a Series with MultiIndex
When aligning a DataFrame to a Series we are using Series.reindex() to broadcast the Series data to the new index. That introduces NaNs when the new index rows are not identical to the existing ones, which is not the case when we introduce a new MultiIndex level. In this patch we use the same approach as for aligning a Series to another Series. That means that we have to replicate a part of Series._reindex_indexer as DataFrame does not have it.
1 parent aafa7a9 commit 7d1e6ad

File tree

4 files changed

+98
-19
lines changed

4 files changed

+98
-19
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,7 @@ Reshaping
385385
^^^^^^^^^
386386
- Bug in :func:`concat` between a :class:`Series` with integer dtype and another with :class:`CategoricalDtype` with integer categories and containing ``NaN`` values casting to object dtype instead of ``float64`` (:issue:`45359`)
387387
- Bug in :func:`get_dummies` that selected object and categorical dtypes but not string (:issue:`44965`)
388+
- Bug in :meth:`DataFrame.align` when aligning a :class:`MultiIndex` to a :class:`Series` with another :class:`MultiIndex` (:issue:`46001`)
388389
-
389390

390391
Sparse

pandas/core/generic.py

+50-19
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,10 @@
2727

2828
import numpy as np
2929

30-
from pandas._config import config
30+
from pandas._config import (
31+
config,
32+
get_option,
33+
)
3134

3235
from pandas._libs import lib
3336
from pandas._libs.tslibs import (
@@ -113,6 +116,7 @@
113116
)
114117

115118
from pandas.core import (
119+
algorithms,
116120
arraylike,
117121
indexing,
118122
missing,
@@ -143,7 +147,10 @@
143147
BlockManager,
144148
SingleArrayManager,
145149
)
146-
from pandas.core.internals.construction import mgr_to_mgr
150+
from pandas.core.internals.construction import (
151+
dict_to_mgr,
152+
mgr_to_mgr,
153+
)
147154
from pandas.core.missing import find_valid_index
148155
from pandas.core.ops import align_method_FRAME
149156
from pandas.core.reshape.concat import concat
@@ -8974,10 +8981,14 @@ def _align_series(
89748981

89758982
is_series = isinstance(self, ABCSeries)
89768983

8984+
if (not is_series and axis is None) or axis not in [None, 0, 1]:
8985+
raise ValueError("Must specify axis=0 or 1")
8986+
8987+
if is_series and axis == 1:
8988+
raise ValueError("cannot align series to a series other than axis 0")
8989+
89778990
# series/series compat, other must always be a Series
8978-
if is_series:
8979-
if axis:
8980-
raise ValueError("cannot align series to a series other than axis 0")
8991+
if not axis:
89818992

89828993
# equal
89838994
if self.index.equals(other.index):
@@ -8987,26 +8998,46 @@ def _align_series(
89878998
other.index, how=join, level=level, return_indexers=True
89888999
)
89899000

8990-
left = self._reindex_indexer(join_index, lidx, copy)
9001+
if is_series:
9002+
left = self._reindex_indexer(join_index, lidx, copy)
9003+
elif join_index is None:
9004+
left = self.copy() if copy else self
9005+
else:
9006+
data = {
9007+
c: algorithms.take_nd(
9008+
self.__getitem__(c)._values,
9009+
lidx,
9010+
allow_fill=True,
9011+
fill_value=None,
9012+
)
9013+
for c in self.columns
9014+
}
9015+
left = self._constructor(
9016+
dict_to_mgr(
9017+
data,
9018+
join_index,
9019+
self.columns,
9020+
typ=get_option("mode.data_manager"),
9021+
copy=copy,
9022+
)
9023+
)
9024+
89919025
right = other._reindex_indexer(join_index, ridx, copy)
89929026

89939027
else:
9028+
89949029
# one has > 1 ndim
89959030
fdata = self._mgr
8996-
if axis in [0, 1]:
8997-
join_index = self.axes[axis]
8998-
lidx, ridx = None, None
8999-
if not join_index.equals(other.index):
9000-
join_index, lidx, ridx = join_index.join(
9001-
other.index, how=join, level=level, return_indexers=True
9002-
)
9003-
9004-
if lidx is not None:
9005-
bm_axis = self._get_block_manager_axis(axis)
9006-
fdata = fdata.reindex_indexer(join_index, lidx, axis=bm_axis)
9031+
join_index = self.axes[1]
9032+
lidx, ridx = None, None
9033+
if not join_index.equals(other.index):
9034+
join_index, lidx, ridx = join_index.join(
9035+
other.index, how=join, level=level, return_indexers=True
9036+
)
90079037

9008-
else:
9009-
raise ValueError("Must specify axis=0 or 1")
9038+
if lidx is not None:
9039+
bm_axis = self._get_block_manager_axis(1)
9040+
fdata = fdata.reindex_indexer(join_index, lidx, axis=bm_axis)
90109041

90119042
if copy and fdata is self._mgr:
90129043
fdata = fdata.copy()

pandas/tests/frame/methods/test_align.py

+23
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,29 @@ def test_align_series_combinations(self):
243243
tm.assert_series_equal(res1, exp2)
244244
tm.assert_frame_equal(res2, exp1)
245245

246+
def test_multiindex_align_to_series_with_common_index_level(self):
247+
# GH-46001
248+
foo_index = Index([1, 2, 3], name="foo")
249+
bar_index = Index([1, 2], name="bar")
250+
251+
series = Series([1, 2], index=bar_index, name="foo_series")
252+
df = DataFrame(
253+
{"col": np.arange(6)},
254+
index=pd.MultiIndex.from_product([foo_index, bar_index]),
255+
)
256+
257+
expected = Series([1, 2] * 3, index=df.index, name="foo_series")
258+
_, result = df.align(series, axis=0)
259+
260+
tm.assert_series_equal(result, expected)
261+
262+
def test_missing_axis_specification_exception(self):
263+
df = DataFrame(np.arange(50).reshape((10, 5)))
264+
s = Series(np.arange(5))
265+
266+
with pytest.raises(ValueError, match=r"axis=0 or 1"):
267+
df.align(s)
268+
246269
def _check_align(self, a, b, axis, fill_axis, how, method, limit=None):
247270
aa, ab = a.align(
248271
b, axis=axis, join=how, method=method, limit=limit, fill_axis=fill_axis

pandas/tests/frame/test_arithmetic.py

+24
Original file line numberDiff line numberDiff line change
@@ -1061,6 +1061,30 @@ def test_frame_single_columns_object_sum_axis_1():
10611061
tm.assert_series_equal(result, expected)
10621062

10631063

1064+
def test_frame_multi_index_operations():
1065+
# GH 43321
1066+
df = DataFrame(
1067+
{2010: [1, 2, 3], 2020: [3, 4, 5]},
1068+
index=MultiIndex.from_product(
1069+
[["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
1070+
),
1071+
)
1072+
1073+
s = Series(
1074+
[0.4],
1075+
index=MultiIndex.from_product([["b"], ["a"]], names=["mod", "scen"]),
1076+
)
1077+
1078+
expected = DataFrame(
1079+
{2010: [1.4, 2.4, 3.4], 2020: [3.4, 4.4, 5.4]},
1080+
index=MultiIndex.from_product(
1081+
[["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
1082+
),
1083+
)
1084+
1085+
tm.assert_frame_equal(df.add(s, axis=0), expected)
1086+
1087+
10641088
# -------------------------------------------------------------------
10651089
# Unsorted
10661090
# These arithmetic tests were previously in other files, eventually

0 commit comments

Comments
 (0)