diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f76d94036c6d8..e9c63b2999f1d 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -795,6 +795,7 @@ Period Plotting ^^^^^^^^ - Bug in :meth:`.DataFrameGroupBy.boxplot` failed when there were multiple groupings (:issue:`14701`) +- Bug in :meth:`DataFrame.plot.bar` when ``subplots`` and ``stacked=True`` are used in conjunction which causes incorrect stacking. (:issue:`61018`) - Bug in :meth:`DataFrame.plot.bar` with ``stacked=True`` where labels on stacked bars with zero-height segments were incorrectly positioned at the base instead of the label position of the previous segment (:issue:`59429`) - Bug in :meth:`DataFrame.plot.line` raising ``ValueError`` when set both color and a ``dict`` style (:issue:`59461`) - Bug in :meth:`DataFrame.plot` that causes a shift to the right when the frequency multiplier is greater than one. (:issue:`57587`) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index a217ee8a86a16..dc931e26ca660 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1928,6 +1928,27 @@ def _make_plot(self, fig: Figure) -> None: K = self.nseries data = self.data.fillna(0) + + _stacked_subplots_ind: dict[int, int] = {} + _stacked_subplots_offsets = [] + + self.subplots: list[Any] + if self.subplots: + subplots_flag = 1 + else: + subplots_flag = 0 + + if subplots_flag & self.stacked: + sub_range = range(len(self.subplots)) + ss_temp = { + x: self.subplots[x] for x in sub_range if len(self.subplots[x]) > 1 + } + for k, v in ss_temp.items(): + for x in v: + _stacked_subplots_ind.setdefault(int(x), k) + + _stacked_subplots_offsets.append([0, 0]) + for i, (label, y) in enumerate(self._iter_data(data=data)): ax = self._get_ax(i) kwds = self.kwds.copy() @@ -1953,7 +1974,28 @@ def _make_plot(self, fig: Figure) -> None: start = start + self._start_base kwds["align"] = self._align - if self.subplots: + + if i in _stacked_subplots_ind: + offset_index = _stacked_subplots_ind[i] + pos_prior, neg_prior = _stacked_subplots_offsets[offset_index] # type:ignore[assignment] + mask = y >= 0 + start = np.where(mask, pos_prior, neg_prior) + self._start_base + w = self.bar_width / 2 + rect = self._plot( + ax, + self.ax_pos + w, + y, + self.bar_width, + start=start, + label=label, + log=self.log, + **kwds, + ) + pos_new = pos_prior + np.where(mask, y, 0) + neg_new = neg_prior + np.where(mask, 0, y) + _stacked_subplots_offsets[offset_index] = [pos_new, neg_new] + + elif self.subplots: w = self.bar_width / 2 rect = self._plot( ax, diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 43e1255404784..f97f3a9a2ff95 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -681,3 +681,129 @@ def test_bar_plt_xaxis_intervalrange(self): (a.get_text() == b.get_text()) for a, b in zip(s.plot.bar().get_xticklabels(), expected) ) + + +@pytest.fixture(scope="class") +def BSS_data(): + return np.random.default_rng(3).integers(0, 100, 5) + + +@pytest.fixture(scope="class") +def BSS_df(BSS_data) -> DataFrame: + BSS_df = DataFrame( + {"A": BSS_data, "B": BSS_data[::-1], "C": BSS_data[0], "D": BSS_data[-1]} + ) + return BSS_df + + +def _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division): + subplot_data_df_list = [] + + # get xy and height of squares representing data, separated by subplots + for i in range(len(subplot_division)): + subplot_data = np.array( + [ + (x.get_x(), x.get_y(), x.get_height()) + for x in ax[i].findobj(plt.Rectangle) + if x.get_height() in BSS_data + ] + ) + subplot_data_df_list.append( + DataFrame(data=subplot_data, columns=["x_coord", "y_coord", "height"]) + ) + + return subplot_data_df_list + + +def _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df, subplot_columns): + subplot_sliced_by_source = [ + subplot_data_df.iloc[len(BSS_data) * i : len(BSS_data) * (i + 1)].reset_index() + for i in range(len(subplot_columns)) + ] + expected_total_height = BSS_df.loc[:, subplot_columns].sum(axis=1) + + for i in range(len(subplot_columns)): + sliced_df = subplot_sliced_by_source[i] + if i == 0: + # Checks that the bar chart starts y=0 + assert (sliced_df["y_coord"] == 0).all + height_iter = sliced_df["y_coord"].add(sliced_df["height"]) + else: + height_iter = height_iter + sliced_df["height"] + + if i + 1 == len(subplot_columns): + # Checks final height matches what is expected + tm.assert_series_equal( + height_iter, expected_total_height, check_names=False, check_dtype=False + ) + + else: + # Checks each preceding bar ends where the next one starts + next_start_coord = subplot_sliced_by_source[i + 1]["y_coord"] + tm.assert_series_equal( + height_iter, next_start_coord, check_names=False, check_dtype=False + ) + + +class TestBarSubplotStacked: + # GH Issue 61018 + @pytest.mark.parametrize("columns_used", [["A", "B"], ["C", "D"], ["D", "A"]]) + def test_bar_1_subplot_1_double_stacked(self, BSS_data, BSS_df, columns_used): + BSS_df_trimmed = BSS_df[columns_used] + subplot_division = [columns_used] + ax = BSS_df_trimmed.plot(subplots=subplot_division, kind="bar", stacked=True) + subplot_data_df_list = _BSS_xyheight_from_ax_helper( + BSS_data, ax, subplot_division + ) + for i in range(len(subplot_data_df_list)): + _BSS_subplot_checker( + BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i] + ) + + @pytest.mark.parametrize( + "columns_used", [["A", "B", "C"], ["A", "C", "B"], ["D", "A", "C"]] + ) + def test_bar_2_subplot_1_double_stacked(self, BSS_data, BSS_df, columns_used): + BSS_df_trimmed = BSS_df[columns_used] + subplot_division = [(columns_used[0], columns_used[1]), (columns_used[2],)] + ax = BSS_df_trimmed.plot(subplots=subplot_division, kind="bar", stacked=True) + subplot_data_df_list = _BSS_xyheight_from_ax_helper( + BSS_data, ax, subplot_division + ) + for i in range(len(subplot_data_df_list)): + _BSS_subplot_checker( + BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i] + ) + + @pytest.mark.parametrize( + "subplot_division", + [ + [("A", "B"), ("C", "D")], + [("A", "D"), ("C", "B")], + [("B", "C"), ("D", "A")], + [("B", "D"), ("C", "A")], + ], + ) + def test_bar_2_subplot_2_double_stacked(self, BSS_data, BSS_df, subplot_division): + ax = BSS_df.plot(subplots=subplot_division, kind="bar", stacked=True) + subplot_data_df_list = _BSS_xyheight_from_ax_helper( + BSS_data, ax, subplot_division + ) + for i in range(len(subplot_data_df_list)): + _BSS_subplot_checker( + BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i] + ) + + @pytest.mark.parametrize( + "subplot_division", + [[("A", "B", "C")], [("A", "D", "B")], [("C", "A", "D")], [("D", "C", "A")]], + ) + def test_bar_2_subplots_1_triple_stacked(self, BSS_data, BSS_df, subplot_division): + ax = BSS_df.plot(subplots=subplot_division, kind="bar", stacked=True) + subplot_data_df_list = _BSS_xyheight_from_ax_helper( + BSS_data, ax, subplot_division + ) + for i in range(len(subplot_data_df_list)): + _BSS_subplot_checker( + BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i] + )