Skip to content

BUG: Fixed issue with bar plots not stacking correctly when 'stacked' and 'subplots' are used together #61340

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -795,6 +795,7 @@ Period
Plotting
^^^^^^^^
- Bug in :meth:`.DataFrameGroupBy.boxplot` failed when there were multiple groupings (:issue:`14701`)
- Bug in :meth:`DataFrame.plot.bar` when ``subplots`` and ``stacked=True`` are used in conjunction which causes incorrect stacking. (:issue:`61018`)
- Bug in :meth:`DataFrame.plot.bar` with ``stacked=True`` where labels on stacked bars with zero-height segments were incorrectly positioned at the base instead of the label position of the previous segment (:issue:`59429`)
- Bug in :meth:`DataFrame.plot.line` raising ``ValueError`` when set both color and a ``dict`` style (:issue:`59461`)
- Bug in :meth:`DataFrame.plot` that causes a shift to the right when the frequency multiplier is greater than one. (:issue:`57587`)
Expand Down
44 changes: 43 additions & 1 deletion pandas/plotting/_matplotlib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1928,6 +1928,27 @@ def _make_plot(self, fig: Figure) -> None:
K = self.nseries

data = self.data.fillna(0)

_stacked_subplots_ind: dict[int, int] = {}
_stacked_subplots_offsets = []

self.subplots: list[Any]
if self.subplots:
subplots_flag = 1
else:
subplots_flag = 0

if subplots_flag & self.stacked:
Comment on lines +1935 to +1941
Copy link
Member

@mroeschke mroeschke Apr 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
self.subplots: list[Any]
if self.subplots:
subplots_flag = 1
else:
subplots_flag = 0
if subplots_flag & self.stacked:
if self.subplots and self.stacked:

sub_range = range(len(self.subplots))
ss_temp = {
x: self.subplots[x] for x in sub_range if len(self.subplots[x]) > 1
}
for k, v in ss_temp.items():
for x in v:
_stacked_subplots_ind.setdefault(int(x), k)

_stacked_subplots_offsets.append([0, 0])

Comment on lines +1942 to +1951
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
sub_range = range(len(self.subplots))
ss_temp = {
x: self.subplots[x] for x in sub_range if len(self.subplots[x]) > 1
}
for k, v in ss_temp.items():
for x in v:
_stacked_subplots_ind.setdefault(int(x), k)
_stacked_subplots_offsets.append([0, 0])
for i, sub_plot in enumerate(self.subplots):
if len(sub_plot) <= 1:
continue
for plot in sub_plot:
_stacked_subplots_ind[int(plot)] = i
_stacked_subplots_offsets.append([0, 0])

for i, (label, y) in enumerate(self._iter_data(data=data)):
ax = self._get_ax(i)
kwds = self.kwds.copy()
Expand All @@ -1953,7 +1974,28 @@ def _make_plot(self, fig: Figure) -> None:
start = start + self._start_base

kwds["align"] = self._align
if self.subplots:

if i in _stacked_subplots_ind:
offset_index = _stacked_subplots_ind[i]
pos_prior, neg_prior = _stacked_subplots_offsets[offset_index] # type:ignore[assignment]
mask = y >= 0
start = np.where(mask, pos_prior, neg_prior) + self._start_base
w = self.bar_width / 2
rect = self._plot(
ax,
self.ax_pos + w,
y,
self.bar_width,
start=start,
label=label,
log=self.log,
**kwds,
)
pos_new = pos_prior + np.where(mask, y, 0)
neg_new = neg_prior + np.where(mask, 0, y)
_stacked_subplots_offsets[offset_index] = [pos_new, neg_new]

elif self.subplots:
w = self.bar_width / 2
rect = self._plot(
ax,
Expand Down
126 changes: 126 additions & 0 deletions pandas/tests/plotting/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,3 +681,129 @@ def test_bar_plt_xaxis_intervalrange(self):
(a.get_text() == b.get_text())
for a, b in zip(s.plot.bar().get_xticklabels(), expected)
)


@pytest.fixture(scope="class")
def BSS_data():
return np.random.default_rng(3).integers(0, 100, 5)


@pytest.fixture(scope="class")
def BSS_df(BSS_data) -> DataFrame:
BSS_df = DataFrame(
{"A": BSS_data, "B": BSS_data[::-1], "C": BSS_data[0], "D": BSS_data[-1]}
)
return BSS_df
Comment on lines +686 to +696
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
@pytest.fixture(scope="class")
def BSS_data():
return np.random.default_rng(3).integers(0, 100, 5)
@pytest.fixture(scope="class")
def BSS_df(BSS_data) -> DataFrame:
BSS_df = DataFrame(
{"A": BSS_data, "B": BSS_data[::-1], "C": BSS_data[0], "D": BSS_data[-1]}
)
return BSS_df
@pytest.fixture
def BSS_data():
return np.random.default_rng(3).integers(0, 100, 5)
@pytest.fixture
def BSS_df(BSS_data) -> DataFrame:
return DataFrame(
{"A": BSS_data, "B": BSS_data[::-1], "C": BSS_data[0], "D": BSS_data[-1]}
)

Also what's the BBS abbreviation? It would be better to use a clearer name



def _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division):
subplot_data_df_list = []

# get xy and height of squares representing data, separated by subplots
for i in range(len(subplot_division)):
subplot_data = np.array(
[
(x.get_x(), x.get_y(), x.get_height())
for x in ax[i].findobj(plt.Rectangle)
if x.get_height() in BSS_data
]
)
subplot_data_df_list.append(
DataFrame(data=subplot_data, columns=["x_coord", "y_coord", "height"])
)

return subplot_data_df_list


def _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df, subplot_columns):
subplot_sliced_by_source = [
subplot_data_df.iloc[len(BSS_data) * i : len(BSS_data) * (i + 1)].reset_index()
for i in range(len(subplot_columns))
]
expected_total_height = BSS_df.loc[:, subplot_columns].sum(axis=1)

for i in range(len(subplot_columns)):
sliced_df = subplot_sliced_by_source[i]
if i == 0:
# Checks that the bar chart starts y=0
assert (sliced_df["y_coord"] == 0).all
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
assert (sliced_df["y_coord"] == 0).all
assert (sliced_df["y_coord"] == 0).all()

?

height_iter = sliced_df["y_coord"].add(sliced_df["height"])
else:
height_iter = height_iter + sliced_df["height"]

if i + 1 == len(subplot_columns):
# Checks final height matches what is expected
tm.assert_series_equal(
height_iter, expected_total_height, check_names=False, check_dtype=False
)

else:
# Checks each preceding bar ends where the next one starts
next_start_coord = subplot_sliced_by_source[i + 1]["y_coord"]
tm.assert_series_equal(
height_iter, next_start_coord, check_names=False, check_dtype=False
)


class TestBarSubplotStacked:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you remove the test class? We're moving away from using this style

# GH Issue 61018
@pytest.mark.parametrize("columns_used", [["A", "B"], ["C", "D"], ["D", "A"]])
def test_bar_1_subplot_1_double_stacked(self, BSS_data, BSS_df, columns_used):
BSS_df_trimmed = BSS_df[columns_used]
subplot_division = [columns_used]
ax = BSS_df_trimmed.plot(subplots=subplot_division, kind="bar", stacked=True)
subplot_data_df_list = _BSS_xyheight_from_ax_helper(
BSS_data, ax, subplot_division
)
for i in range(len(subplot_data_df_list)):
_BSS_subplot_checker(
BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i]
)

@pytest.mark.parametrize(
"columns_used", [["A", "B", "C"], ["A", "C", "B"], ["D", "A", "C"]]
)
def test_bar_2_subplot_1_double_stacked(self, BSS_data, BSS_df, columns_used):
BSS_df_trimmed = BSS_df[columns_used]
subplot_division = [(columns_used[0], columns_used[1]), (columns_used[2],)]
ax = BSS_df_trimmed.plot(subplots=subplot_division, kind="bar", stacked=True)
subplot_data_df_list = _BSS_xyheight_from_ax_helper(
BSS_data, ax, subplot_division
)
for i in range(len(subplot_data_df_list)):
_BSS_subplot_checker(
BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i]
)

@pytest.mark.parametrize(
"subplot_division",
[
[("A", "B"), ("C", "D")],
[("A", "D"), ("C", "B")],
[("B", "C"), ("D", "A")],
[("B", "D"), ("C", "A")],
],
)
def test_bar_2_subplot_2_double_stacked(self, BSS_data, BSS_df, subplot_division):
ax = BSS_df.plot(subplots=subplot_division, kind="bar", stacked=True)
subplot_data_df_list = _BSS_xyheight_from_ax_helper(
BSS_data, ax, subplot_division
)
for i in range(len(subplot_data_df_list)):
_BSS_subplot_checker(
BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i]
)

@pytest.mark.parametrize(
"subplot_division",
[[("A", "B", "C")], [("A", "D", "B")], [("C", "A", "D")], [("D", "C", "A")]],
)
def test_bar_2_subplots_1_triple_stacked(self, BSS_data, BSS_df, subplot_division):
ax = BSS_df.plot(subplots=subplot_division, kind="bar", stacked=True)
subplot_data_df_list = _BSS_xyheight_from_ax_helper(
BSS_data, ax, subplot_division
)
for i in range(len(subplot_data_df_list)):
_BSS_subplot_checker(
BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i]
)
Loading