From 8440ddef16cb08f76d04c0ca21d057a67a21fbbe Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Fri, 21 Mar 2025 21:06:33 +0000 Subject: [PATCH 01/13] test case for subplot stacking --- pandas/tests/plotting/test_common.py | 33 +++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/pandas/tests/plotting/test_common.py b/pandas/tests/plotting/test_common.py index 20daf59356248..c4d74fd30a9c8 100644 --- a/pandas/tests/plotting/test_common.py +++ b/pandas/tests/plotting/test_common.py @@ -1,6 +1,7 @@ import pytest - +import numpy as np from pandas import DataFrame +from pandas import unique from pandas.tests.plotting.common import ( _check_plot_works, _check_ticks_props, @@ -58,3 +59,33 @@ def test_colorbar_layout(self): fig.colorbar(cs0, ax=[axes["A"], axes["B"]], location="right") DataFrame(x).plot(ax=axes["C"]) + + def test_bar_subplot_stacking(self): + #GH Issue 61018 + #Extracts height and location data + test_data = np.random.default_rng(3).integers(0,100,5) + df = DataFrame({"a": test_data, "b": test_data[::-1]}) + ax = _check_plot_works(df.plot, subplots= [('a','b')], kind="bar", stacked=True) + + #get xy and height of squares that represent the data graphed from the df + #we would expect the height value of A to be reflected in the Y coord of B + data_from_plot_mat = [(x.get_x(), x.get_y(), x.get_height()) for x in ax[0].findobj(plt.Rectangle) if x.get_height() in test_data] + data_from_plot_df = DataFrame(data = data_from_plot_mat, columns = ["x_coord", "y_coord", "height"]) + unique_x_loc = unique(data_from_plot_df["x_coord"]) + + plot_a_df = data_from_plot_df.iloc[:len(test_data)] + plot_b_df = data_from_plot_df.iloc[len(test_data):].reset_index() + total_bar_height = plot_a_df["height"].add(plot_b_df["height"]) + + print(test_data + test_data[::-1]) + + #check number of bars matches the number of data plotted + assert len(unique_x_loc) == len(test_data) + + #checks that the first set of bars are the correct height and that the second one starts at the top of the first, additional checks the combined height of the bars are correct + assert (plot_a_df["height"] == test_data).all() + assert (plot_b_df["y_coord"] == test_data).all() + assert (total_bar_height == test_data + test_data[::-1]).all() + + + From 350a7ecbbdb423ecddf3bcff02a33732328b2412 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Fri, 21 Mar 2025 22:19:19 +0000 Subject: [PATCH 02/13] Removed overlooked print statement --- pandas/tests/plotting/test_common.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/tests/plotting/test_common.py b/pandas/tests/plotting/test_common.py index c4d74fd30a9c8..372adba9bce8f 100644 --- a/pandas/tests/plotting/test_common.py +++ b/pandas/tests/plotting/test_common.py @@ -77,8 +77,6 @@ def test_bar_subplot_stacking(self): plot_b_df = data_from_plot_df.iloc[len(test_data):].reset_index() total_bar_height = plot_a_df["height"].add(plot_b_df["height"]) - print(test_data + test_data[::-1]) - #check number of bars matches the number of data plotted assert len(unique_x_loc) == len(test_data) From b27d1acd88531a5f08340f69f1a80dcc00c2a19c Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Sun, 30 Mar 2025 03:20:18 -0500 Subject: [PATCH 03/13] Updated test to check other subplot in figure --- pandas/tests/plotting/test_common.py | 47 ++++++++++++++++++---------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/pandas/tests/plotting/test_common.py b/pandas/tests/plotting/test_common.py index 372adba9bce8f..ecbb7420483f8 100644 --- a/pandas/tests/plotting/test_common.py +++ b/pandas/tests/plotting/test_common.py @@ -62,28 +62,43 @@ def test_colorbar_layout(self): def test_bar_subplot_stacking(self): #GH Issue 61018 - #Extracts height and location data test_data = np.random.default_rng(3).integers(0,100,5) - df = DataFrame({"a": test_data, "b": test_data[::-1]}) - ax = _check_plot_works(df.plot, subplots= [('a','b')], kind="bar", stacked=True) + df = DataFrame({"A": test_data, "B": test_data[::-1], "C": test_data[0]}) + ax = df.plot(subplots= [('A','B')], kind="bar", stacked=True) - #get xy and height of squares that represent the data graphed from the df - #we would expect the height value of A to be reflected in the Y coord of B - data_from_plot_mat = [(x.get_x(), x.get_y(), x.get_height()) for x in ax[0].findobj(plt.Rectangle) if x.get_height() in test_data] - data_from_plot_df = DataFrame(data = data_from_plot_mat, columns = ["x_coord", "y_coord", "height"]) - unique_x_loc = unique(data_from_plot_df["x_coord"]) - - plot_a_df = data_from_plot_df.iloc[:len(test_data)] - plot_b_df = data_from_plot_df.iloc[len(test_data):].reset_index() - total_bar_height = plot_a_df["height"].add(plot_b_df["height"]) + #finds all the rectangles that represent the values from both subplots + data_from_subplots = [[(x.get_x(), x.get_y(), x.get_height()) for x in ax[i].findobj(plt.Rectangle) if x.get_height() in test_data] for i in range(0,2)] + #get xy and height of squares that represent the data graphed from the df + #we would expect the height value of A to be reflected in the Y coord of B in subplot 1 + subplot_data_df_list = [] + unique_x_loc_list = [] + for i in range(0,len(data_from_subplots)): + subplot_data_df= DataFrame(data = data_from_subplots[i], columns = ["x_coord", "y_coord", "height"]) + unique_x_loc = unique(subplot_data_df["x_coord"]) + + subplot_data_df_list.append(subplot_data_df) + unique_x_loc_list.append(unique_x_loc) + + #Checks subplot 1 + plot_A_df = subplot_data_df_list[0].iloc[:len(test_data)] + plot_B_df = subplot_data_df_list[0].iloc[len(test_data):].reset_index() + total_bar_height = plot_A_df["height"].add(plot_B_df["height"]) #check number of bars matches the number of data plotted - assert len(unique_x_loc) == len(test_data) - + assert len(unique_x_loc_list[0]) == len(test_data) #checks that the first set of bars are the correct height and that the second one starts at the top of the first, additional checks the combined height of the bars are correct - assert (plot_a_df["height"] == test_data).all() - assert (plot_b_df["y_coord"] == test_data).all() + assert (plot_A_df["height"] == test_data).all() + assert (plot_B_df["y_coord"] == test_data).all() assert (total_bar_height == test_data + test_data[::-1]).all() + #Checks subplot 2 + plot_C_df = subplot_data_df_list[1].iloc[:len(test_data)] + #check number of bars matches the number of data plotted + assert len(unique_x_loc_list[1]) == len(test_data) + #checks that all the bars start at zero and are the correct height + assert (plot_C_df["height"] == test_data[0]).all() + assert (plot_C_df["y_coord"] == 0).all() + + From 1fe342af043b76c674adb73d84fe946f30133d83 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Tue, 8 Apr 2025 00:44:25 +0000 Subject: [PATCH 04/13] Updated test cases to include more subplot stacking possibilities --- pandas/tests/plotting/test_common.py | 40 -------------- pandas/tests/plotting/test_misc.py | 81 ++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 40 deletions(-) diff --git a/pandas/tests/plotting/test_common.py b/pandas/tests/plotting/test_common.py index ecbb7420483f8..ee7b57ee62b3b 100644 --- a/pandas/tests/plotting/test_common.py +++ b/pandas/tests/plotting/test_common.py @@ -1,7 +1,6 @@ import pytest import numpy as np from pandas import DataFrame -from pandas import unique from pandas.tests.plotting.common import ( _check_plot_works, _check_ticks_props, @@ -59,45 +58,6 @@ def test_colorbar_layout(self): fig.colorbar(cs0, ax=[axes["A"], axes["B"]], location="right") DataFrame(x).plot(ax=axes["C"]) - - def test_bar_subplot_stacking(self): - #GH Issue 61018 - test_data = np.random.default_rng(3).integers(0,100,5) - df = DataFrame({"A": test_data, "B": test_data[::-1], "C": test_data[0]}) - ax = df.plot(subplots= [('A','B')], kind="bar", stacked=True) - - #finds all the rectangles that represent the values from both subplots - data_from_subplots = [[(x.get_x(), x.get_y(), x.get_height()) for x in ax[i].findobj(plt.Rectangle) if x.get_height() in test_data] for i in range(0,2)] - - #get xy and height of squares that represent the data graphed from the df - #we would expect the height value of A to be reflected in the Y coord of B in subplot 1 - subplot_data_df_list = [] - unique_x_loc_list = [] - for i in range(0,len(data_from_subplots)): - subplot_data_df= DataFrame(data = data_from_subplots[i], columns = ["x_coord", "y_coord", "height"]) - unique_x_loc = unique(subplot_data_df["x_coord"]) - - subplot_data_df_list.append(subplot_data_df) - unique_x_loc_list.append(unique_x_loc) - - #Checks subplot 1 - plot_A_df = subplot_data_df_list[0].iloc[:len(test_data)] - plot_B_df = subplot_data_df_list[0].iloc[len(test_data):].reset_index() - total_bar_height = plot_A_df["height"].add(plot_B_df["height"]) - #check number of bars matches the number of data plotted - assert len(unique_x_loc_list[0]) == len(test_data) - #checks that the first set of bars are the correct height and that the second one starts at the top of the first, additional checks the combined height of the bars are correct - assert (plot_A_df["height"] == test_data).all() - assert (plot_B_df["y_coord"] == test_data).all() - assert (total_bar_height == test_data + test_data[::-1]).all() - - #Checks subplot 2 - plot_C_df = subplot_data_df_list[1].iloc[:len(test_data)] - #check number of bars matches the number of data plotted - assert len(unique_x_loc_list[1]) == len(test_data) - #checks that all the bars start at zero and are the correct height - assert (plot_C_df["height"] == test_data[0]).all() - assert (plot_C_df["y_coord"] == 0).all() diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 43e1255404784..d878c9200c835 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -681,3 +681,84 @@ def test_bar_plt_xaxis_intervalrange(self): (a.get_text() == b.get_text()) for a, b in zip(s.plot.bar().get_xticklabels(), expected) ) + +@pytest.fixture(scope="class") +def BSS_data() -> np.array: + yield np.random.default_rng(3).integers(0,100,5) + +@pytest.fixture(scope="class") +def BSS_df(BSS_data) -> DataFrame: + BSS_df = DataFrame({"A": BSS_data, "B": BSS_data[::-1], "C": BSS_data[0], "D": BSS_data[-1]}) + return BSS_df + +def _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division): + subplot_data_df_list = [] + + # get xy and height of squares that represent the data graphed from the df, seperated by subplots + for i in range(len(subplot_division)): + subplot_data = np.array([(x.get_x(), x.get_y(), x.get_height()) for x in ax[i].findobj(plt.Rectangle) if x.get_height() in BSS_data]) + subplot_data_df_list.append(DataFrame(data = subplot_data, columns = ["x_coord", "y_coord", "height"])) + + return subplot_data_df_list + +def _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df, subplot_columns): + assert_flag = 0 + subplot_sliced_by_source = [subplot_data_df.iloc[len(BSS_data) * i : len(BSS_data) * (i+1)].reset_index() for i in range(0, len(subplot_columns))] + expected_total_height = BSS_df.loc[:,subplot_columns].sum(axis=1) + + for i in range(len(subplot_columns)): + sliced_df = subplot_sliced_by_source[i] + if i == 0: + #Checks that the bar chart starts y=0 + assert (sliced_df["y_coord"] == 0).all + height_iter = sliced_df["y_coord"].add(sliced_df["height"]) + else: + height_iter = height_iter + sliced_df["height"] + + if i+1 == len(subplot_columns): + #Checks final height matches what is expected + tm.assert_series_equal(height_iter, expected_total_height, check_names = False, check_dtype= False) + + else: + #Checks each preceding bar ends where the next one starts + next_start_coord = subplot_sliced_by_source[i+1]["y_coord"] + tm.assert_series_equal(height_iter, next_start_coord, check_names = False, check_dtype= False) + +class TestBarSubplotStacked: + #GH Issue 61018 + def test_bar_1_subplot_1_double_stacked(self, BSS_data, BSS_df): + columns_used = ["A", "B"] + BSS_df_trimmed = BSS_df[columns_used] + subplot_division = [columns_used] + ax = BSS_df_trimmed.plot(subplots = subplot_division, kind="bar", stacked=True) + subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) + for i in range(len(subplot_data_df_list)): + _BSS_subplot_checker(BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i]) + plt.savefig("1s1d.png") + + + def test_bar_2_subplot_1_double_stacked(self, BSS_data, BSS_df): + columns_used = ["A", "B", "C"] + BSS_df_trimmed = BSS_df[columns_used] + subplot_division = [(columns_used[0], columns_used[1]), (columns_used[2],)] + ax = BSS_df_trimmed.plot(subplots = subplot_division, kind="bar", stacked=True) + subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) + for i in range(len(subplot_data_df_list)): + _BSS_subplot_checker(BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i]) + plt.savefig("2s1d.png") + + def test_bar_2_subplot_2_double_stacked(self, BSS_data, BSS_df): + subplot_division = [('A', 'D'), ('C', 'B')] + ax = BSS_df.plot(subplots = subplot_division, kind="bar", stacked=True) + subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) + for i in range(len(subplot_data_df_list)): + _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i]) + plt.savefig("2s2d.png") + + def test_bar_2_subplots_1_triple_stacked(self, BSS_data, BSS_df): + subplot_division = [('A', 'D', 'C')] + ax = BSS_df.plot(subplots = subplot_division, kind="bar", stacked=True) + subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) + for i in range(len(subplot_data_df_list)): + _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i]) + plt.savefig("2s1t.png") \ No newline at end of file From de55789ca419656bd555068882189a6d824a738c Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Tue, 8 Apr 2025 00:53:10 +0000 Subject: [PATCH 05/13] removed savefig() left in test cases --- pandas/tests/plotting/test_misc.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index d878c9200c835..e8dd244bd9e63 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -734,7 +734,6 @@ def test_bar_1_subplot_1_double_stacked(self, BSS_data, BSS_df): subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) for i in range(len(subplot_data_df_list)): _BSS_subplot_checker(BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i]) - plt.savefig("1s1d.png") def test_bar_2_subplot_1_double_stacked(self, BSS_data, BSS_df): @@ -745,7 +744,6 @@ def test_bar_2_subplot_1_double_stacked(self, BSS_data, BSS_df): subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) for i in range(len(subplot_data_df_list)): _BSS_subplot_checker(BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i]) - plt.savefig("2s1d.png") def test_bar_2_subplot_2_double_stacked(self, BSS_data, BSS_df): subplot_division = [('A', 'D'), ('C', 'B')] @@ -753,12 +751,10 @@ def test_bar_2_subplot_2_double_stacked(self, BSS_data, BSS_df): subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) for i in range(len(subplot_data_df_list)): _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i]) - plt.savefig("2s2d.png") def test_bar_2_subplots_1_triple_stacked(self, BSS_data, BSS_df): subplot_division = [('A', 'D', 'C')] ax = BSS_df.plot(subplots = subplot_division, kind="bar", stacked=True) subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) for i in range(len(subplot_data_df_list)): - _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i]) - plt.savefig("2s1t.png") \ No newline at end of file + _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i]) \ No newline at end of file From e8a6f91f992aa2e38fb1ef8af4cfff26d56eb035 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Tue, 22 Apr 2025 22:47:22 +0000 Subject: [PATCH 06/13] Updated test cases to test more arrangements --- pandas/tests/plotting/test_misc.py | 36 +++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index e8dd244bd9e63..10400870ecb80 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -726,8 +726,11 @@ def _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df, subplot_columns): class TestBarSubplotStacked: #GH Issue 61018 - def test_bar_1_subplot_1_double_stacked(self, BSS_data, BSS_df): - columns_used = ["A", "B"] + @pytest.mark.parametrize("columns_used",[["A", "B"], + ["C", "D"], + ["D", "A"] + ]) + def test_bar_1_subplot_1_double_stacked(self, BSS_data, BSS_df, columns_used): BSS_df_trimmed = BSS_df[columns_used] subplot_division = [columns_used] ax = BSS_df_trimmed.plot(subplots = subplot_division, kind="bar", stacked=True) @@ -735,25 +738,36 @@ def test_bar_1_subplot_1_double_stacked(self, BSS_data, BSS_df): for i in range(len(subplot_data_df_list)): _BSS_subplot_checker(BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i]) - - def test_bar_2_subplot_1_double_stacked(self, BSS_data, BSS_df): - columns_used = ["A", "B", "C"] - BSS_df_trimmed = BSS_df[columns_used] + @pytest.mark.parametrize("columns_used",[["A", "B", "C"], + ["A", "C", "B"], + ["D", "A", "C"] + + ]) + def test_bar_2_subplot_1_double_stacked(self, BSS_data, BSS_df, columns_used): + BSS_df_trimmed = BSS_df[columns_used] subplot_division = [(columns_used[0], columns_used[1]), (columns_used[2],)] ax = BSS_df_trimmed.plot(subplots = subplot_division, kind="bar", stacked=True) subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) for i in range(len(subplot_data_df_list)): _BSS_subplot_checker(BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i]) - def test_bar_2_subplot_2_double_stacked(self, BSS_data, BSS_df): - subplot_division = [('A', 'D'), ('C', 'B')] + @pytest.mark.parametrize("subplot_division", [[("A", "B"), ("C", "D")], + [("A", "D"), ("C", "B")], + [("B", "C"), ("D", "A")], + [("B", "D"), ("C", "A")] + ]) + def test_bar_2_subplot_2_double_stacked(self, BSS_data, BSS_df, subplot_division): ax = BSS_df.plot(subplots = subplot_division, kind="bar", stacked=True) subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) for i in range(len(subplot_data_df_list)): _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i]) - - def test_bar_2_subplots_1_triple_stacked(self, BSS_data, BSS_df): - subplot_division = [('A', 'D', 'C')] + + @pytest.mark.parametrize("subplot_division", [[("A", "B", "C")], + [("A", "D", "B")], + [("C", "A", "D")], + [("D", "C", "A")] + ]) + def test_bar_2_subplots_1_triple_stacked(self, BSS_data, BSS_df, subplot_division): ax = BSS_df.plot(subplots = subplot_division, kind="bar", stacked=True) subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) for i in range(len(subplot_data_df_list)): From 0d9f5daf1d422f8f0d61a1e3dbe4e7ee3b722ae5 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Tue, 22 Apr 2025 23:58:13 +0000 Subject: [PATCH 07/13] Completed function fix (order of subplot input does not matter, need clarification if it matters) --- pandas/plotting/_matplotlib/core.py | 41 ++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 1035150302d2c..6192b6c611b6e 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1921,6 +1921,19 @@ def _make_plot(self, fig: Figure) -> None: K = self.nseries data = self.data.fillna(0) + + _stacked_subplots_ind_dict = {} + _stacked_subplots_offsets = [] + + if self.subplots != False & self.stacked: + # _stacked_subplots_list = [sorted(x) for x in self.subplots if len(x) > 1] + temp_ss_dict = {x: self.subplots[x] for x in range(len(self.subplots)) if len(self.subplots[x]) > 1} + for k, v in temp_ss_dict.items(): + for x in v: + _stacked_subplots_ind_dict.setdefault(int(x), k) + + _stacked_subplots_offsets.append([0,0]) + for i, (label, y) in enumerate(self._iter_data(data=data)): ax = self._get_ax(i) kwds = self.kwds.copy() @@ -1946,7 +1959,33 @@ def _make_plot(self, fig: Figure) -> None: start = start + self._start_base kwds["align"] = self._align - if self.subplots: + + try: + offset_index = _stacked_subplots_ind_dict[i] + _stacked_subplots_flag = 1 + except: + _stacked_subplots_flag = 0 + + if _stacked_subplots_flag: + mask = y >= 0 + pos_prior, neg_prior = _stacked_subplots_offsets[offset_index] + start = np.where(mask, pos_prior, neg_prior) + self._start_base + w = self.bar_width / 2 + rect = self._plot( + ax, + self.ax_pos + w, + y, + self.bar_width, + start=start, + label=label, + log=self.log, + **kwds, + ) + pos_new = pos_prior + np.where(mask, y, 0) + neg_new = neg_prior + np.where(mask, 0, y) + _stacked_subplots_offsets[offset_index] = [pos_new, neg_new] + + elif self.subplots: w = self.bar_width / 2 rect = self._plot( ax, From b3560288f5b69df1f013a3072775d3b930815ae4 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Wed, 23 Apr 2025 00:15:05 +0000 Subject: [PATCH 08/13] appeasing the great pre-commit formatter --- pandas/plotting/_matplotlib/core.py | 28 +++--- pandas/tests/plotting/test_common.py | 6 +- pandas/tests/plotting/test_misc.py | 137 +++++++++++++++++---------- 3 files changed, 100 insertions(+), 71 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 6192b6c611b6e..6462e77c0ffad 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1922,17 +1922,20 @@ def _make_plot(self, fig: Figure) -> None: data = self.data.fillna(0) - _stacked_subplots_ind_dict = {} - _stacked_subplots_offsets = [] + _stacked_subplots_ind_dict = {} + _stacked_subplots_offsets = [] if self.subplots != False & self.stacked: - # _stacked_subplots_list = [sorted(x) for x in self.subplots if len(x) > 1] - temp_ss_dict = {x: self.subplots[x] for x in range(len(self.subplots)) if len(self.subplots[x]) > 1} + temp_ss_dict = { + x: self.subplots[x] + for x in range(len(self.subplots)) + if len(self.subplots[x]) > 1 + } for k, v in temp_ss_dict.items(): for x in v: _stacked_subplots_ind_dict.setdefault(int(x), k) - - _stacked_subplots_offsets.append([0,0]) + + _stacked_subplots_offsets.append([0, 0]) for i, (label, y) in enumerate(self._iter_data(data=data)): ax = self._get_ax(i) @@ -1959,16 +1962,11 @@ def _make_plot(self, fig: Figure) -> None: start = start + self._start_base kwds["align"] = self._align - - try: - offset_index = _stacked_subplots_ind_dict[i] - _stacked_subplots_flag = 1 - except: - _stacked_subplots_flag = 0 - if _stacked_subplots_flag: - mask = y >= 0 + if i in _stacked_subplots_ind_dict: + offset_index = _stacked_subplots_ind_dict[i] pos_prior, neg_prior = _stacked_subplots_offsets[offset_index] + mask = y >= 0 start = np.where(mask, pos_prior, neg_prior) + self._start_base w = self.bar_width / 2 rect = self._plot( @@ -1984,7 +1982,7 @@ def _make_plot(self, fig: Figure) -> None: pos_new = pos_prior + np.where(mask, y, 0) neg_new = neg_prior + np.where(mask, 0, y) _stacked_subplots_offsets[offset_index] = [pos_new, neg_new] - + elif self.subplots: w = self.bar_width / 2 rect = self._plot( diff --git a/pandas/tests/plotting/test_common.py b/pandas/tests/plotting/test_common.py index ee7b57ee62b3b..20daf59356248 100644 --- a/pandas/tests/plotting/test_common.py +++ b/pandas/tests/plotting/test_common.py @@ -1,5 +1,5 @@ import pytest -import numpy as np + from pandas import DataFrame from pandas.tests.plotting.common import ( _check_plot_works, @@ -58,7 +58,3 @@ def test_colorbar_layout(self): fig.colorbar(cs0, ax=[axes["A"], axes["B"]], location="right") DataFrame(x).plot(ax=axes["C"]) - - - - diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 10400870ecb80..4c0c77152f0f7 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -682,93 +682,128 @@ def test_bar_plt_xaxis_intervalrange(self): for a, b in zip(s.plot.bar().get_xticklabels(), expected) ) + @pytest.fixture(scope="class") def BSS_data() -> np.array: - yield np.random.default_rng(3).integers(0,100,5) + return np.random.default_rng(3).integers(0, 100, 5) + @pytest.fixture(scope="class") def BSS_df(BSS_data) -> DataFrame: - BSS_df = DataFrame({"A": BSS_data, "B": BSS_data[::-1], "C": BSS_data[0], "D": BSS_data[-1]}) + BSS_df = DataFrame( + {"A": BSS_data, "B": BSS_data[::-1], "C": BSS_data[0], "D": BSS_data[-1]} + ) return BSS_df + def _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division): subplot_data_df_list = [] - # get xy and height of squares that represent the data graphed from the df, seperated by subplots + # get xy and height of squares representing data, separated by subplots for i in range(len(subplot_division)): - subplot_data = np.array([(x.get_x(), x.get_y(), x.get_height()) for x in ax[i].findobj(plt.Rectangle) if x.get_height() in BSS_data]) - subplot_data_df_list.append(DataFrame(data = subplot_data, columns = ["x_coord", "y_coord", "height"])) + subplot_data = np.array( + [ + (x.get_x(), x.get_y(), x.get_height()) + for x in ax[i].findobj(plt.Rectangle) + if x.get_height() in BSS_data + ] + ) + subplot_data_df_list.append( + DataFrame(data=subplot_data, columns=["x_coord", "y_coord", "height"]) + ) return subplot_data_df_list + def _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df, subplot_columns): - assert_flag = 0 - subplot_sliced_by_source = [subplot_data_df.iloc[len(BSS_data) * i : len(BSS_data) * (i+1)].reset_index() for i in range(0, len(subplot_columns))] - expected_total_height = BSS_df.loc[:,subplot_columns].sum(axis=1) - + subplot_sliced_by_source = [ + subplot_data_df.iloc[len(BSS_data) * i : len(BSS_data) * (i + 1)].reset_index() + for i in range(len(subplot_columns)) + ] + expected_total_height = BSS_df.loc[:, subplot_columns].sum(axis=1) + for i in range(len(subplot_columns)): sliced_df = subplot_sliced_by_source[i] if i == 0: - #Checks that the bar chart starts y=0 + # Checks that the bar chart starts y=0 assert (sliced_df["y_coord"] == 0).all height_iter = sliced_df["y_coord"].add(sliced_df["height"]) else: height_iter = height_iter + sliced_df["height"] - if i+1 == len(subplot_columns): - #Checks final height matches what is expected - tm.assert_series_equal(height_iter, expected_total_height, check_names = False, check_dtype= False) - + if i + 1 == len(subplot_columns): + # Checks final height matches what is expected + tm.assert_series_equal( + height_iter, expected_total_height, check_names=False, check_dtype=False + ) + else: - #Checks each preceding bar ends where the next one starts - next_start_coord = subplot_sliced_by_source[i+1]["y_coord"] - tm.assert_series_equal(height_iter, next_start_coord, check_names = False, check_dtype= False) + # Checks each preceding bar ends where the next one starts + next_start_coord = subplot_sliced_by_source[i + 1]["y_coord"] + tm.assert_series_equal( + height_iter, next_start_coord, check_names=False, check_dtype=False + ) + class TestBarSubplotStacked: - #GH Issue 61018 - @pytest.mark.parametrize("columns_used",[["A", "B"], - ["C", "D"], - ["D", "A"] - ]) + # GH Issue 61018 + @pytest.mark.parametrize("columns_used", [["A", "B"], ["C", "D"], ["D", "A"]]) def test_bar_1_subplot_1_double_stacked(self, BSS_data, BSS_df, columns_used): BSS_df_trimmed = BSS_df[columns_used] subplot_division = [columns_used] - ax = BSS_df_trimmed.plot(subplots = subplot_division, kind="bar", stacked=True) - subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) + ax = BSS_df_trimmed.plot(subplots=subplot_division, kind="bar", stacked=True) + subplot_data_df_list = _BSS_xyheight_from_ax_helper( + BSS_data, ax, subplot_division + ) for i in range(len(subplot_data_df_list)): - _BSS_subplot_checker(BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i]) + _BSS_subplot_checker( + BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i] + ) - @pytest.mark.parametrize("columns_used",[["A", "B", "C"], - ["A", "C", "B"], - ["D", "A", "C"] - - ]) + @pytest.mark.parametrize( + "columns_used", [["A", "B", "C"], ["A", "C", "B"], ["D", "A", "C"]] + ) def test_bar_2_subplot_1_double_stacked(self, BSS_data, BSS_df, columns_used): - BSS_df_trimmed = BSS_df[columns_used] + BSS_df_trimmed = BSS_df[columns_used] subplot_division = [(columns_used[0], columns_used[1]), (columns_used[2],)] - ax = BSS_df_trimmed.plot(subplots = subplot_division, kind="bar", stacked=True) - subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) + ax = BSS_df_trimmed.plot(subplots=subplot_division, kind="bar", stacked=True) + subplot_data_df_list = _BSS_xyheight_from_ax_helper( + BSS_data, ax, subplot_division + ) for i in range(len(subplot_data_df_list)): - _BSS_subplot_checker(BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i]) + _BSS_subplot_checker( + BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i] + ) - @pytest.mark.parametrize("subplot_division", [[("A", "B"), ("C", "D")], - [("A", "D"), ("C", "B")], - [("B", "C"), ("D", "A")], - [("B", "D"), ("C", "A")] - ]) + @pytest.mark.parametrize( + "subplot_division", + [ + [("A", "B"), ("C", "D")], + [("A", "D"), ("C", "B")], + [("B", "C"), ("D", "A")], + [("B", "D"), ("C", "A")], + ], + ) def test_bar_2_subplot_2_double_stacked(self, BSS_data, BSS_df, subplot_division): - ax = BSS_df.plot(subplots = subplot_division, kind="bar", stacked=True) - subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) + ax = BSS_df.plot(subplots=subplot_division, kind="bar", stacked=True) + subplot_data_df_list = _BSS_xyheight_from_ax_helper( + BSS_data, ax, subplot_division + ) for i in range(len(subplot_data_df_list)): - _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i]) - - @pytest.mark.parametrize("subplot_division", [[("A", "B", "C")], - [("A", "D", "B")], - [("C", "A", "D")], - [("D", "C", "A")] - ]) + _BSS_subplot_checker( + BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i] + ) + + @pytest.mark.parametrize( + "subplot_division", + [[("A", "B", "C")], [("A", "D", "B")], [("C", "A", "D")], [("D", "C", "A")]], + ) def test_bar_2_subplots_1_triple_stacked(self, BSS_data, BSS_df, subplot_division): - ax = BSS_df.plot(subplots = subplot_division, kind="bar", stacked=True) - subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) + ax = BSS_df.plot(subplots=subplot_division, kind="bar", stacked=True) + subplot_data_df_list = _BSS_xyheight_from_ax_helper( + BSS_data, ax, subplot_division + ) for i in range(len(subplot_data_df_list)): - _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i]) \ No newline at end of file + _BSS_subplot_checker( + BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i] + ) From 4cc2905dac614a30409a5ed8343d1e3c0d08a0fa Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Wed, 23 Apr 2025 00:50:42 +0000 Subject: [PATCH 09/13] Updated whatsnew --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f76d94036c6d8..e9c63b2999f1d 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -795,6 +795,7 @@ Period Plotting ^^^^^^^^ - Bug in :meth:`.DataFrameGroupBy.boxplot` failed when there were multiple groupings (:issue:`14701`) +- Bug in :meth:`DataFrame.plot.bar` when ``subplots`` and ``stacked=True`` are used in conjunction which causes incorrect stacking. (:issue:`61018`) - Bug in :meth:`DataFrame.plot.bar` with ``stacked=True`` where labels on stacked bars with zero-height segments were incorrectly positioned at the base instead of the label position of the previous segment (:issue:`59429`) - Bug in :meth:`DataFrame.plot.line` raising ``ValueError`` when set both color and a ``dict`` style (:issue:`59461`) - Bug in :meth:`DataFrame.plot` that causes a shift to the right when the frequency multiplier is greater than one. (:issue:`57587`) From 99746323168da2dfeb5948ea7a46c595a7ba967f Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Wed, 23 Apr 2025 04:49:15 +0000 Subject: [PATCH 10/13] Docstring adjustment --- pandas/plotting/_matplotlib/core.py | 17 ++++++++--------- pandas/tests/plotting/test_misc.py | 2 +- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 0c0542503b377..f9cdaae50f069 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1929,18 +1929,17 @@ def _make_plot(self, fig: Figure) -> None: data = self.data.fillna(0) - _stacked_subplots_ind_dict = {} + _stacked_subplots_ind = {} _stacked_subplots_offsets = [] if self.subplots != False & self.stacked: - temp_ss_dict = { - x: self.subplots[x] - for x in range(len(self.subplots)) - if len(self.subplots[x]) > 1 + sub_range = range(len(self.subplots)) + ss_temp = { + x: self.subplots[x] for x in sub_range if len(self.subplots[x]) > 1 } - for k, v in temp_ss_dict.items(): + for k, v in ss_temp.items(): for x in v: - _stacked_subplots_ind_dict.setdefault(int(x), k) + _stacked_subplots_ind.setdefault(int(x), k) _stacked_subplots_offsets.append([0, 0]) @@ -1970,8 +1969,8 @@ def _make_plot(self, fig: Figure) -> None: kwds["align"] = self._align - if i in _stacked_subplots_ind_dict: - offset_index = _stacked_subplots_ind_dict[i] + if i in _stacked_subplots_ind: + offset_index = _stacked_subplots_ind[i] pos_prior, neg_prior = _stacked_subplots_offsets[offset_index] mask = y >= 0 start = np.where(mask, pos_prior, neg_prior) + self._start_base diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 4c0c77152f0f7..f97f3a9a2ff95 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -684,7 +684,7 @@ def test_bar_plt_xaxis_intervalrange(self): @pytest.fixture(scope="class") -def BSS_data() -> np.array: +def BSS_data(): return np.random.default_rng(3).integers(0, 100, 5) From ddd331175dfaef3c0f4c4f9bea21952a307e9ed6 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Wed, 23 Apr 2025 05:12:42 +0000 Subject: [PATCH 11/13] Moved self.subplot check to a seperate bool --- pandas/plotting/_matplotlib/core.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index f9cdaae50f069..7ebc8948b91b5 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1932,7 +1932,12 @@ def _make_plot(self, fig: Figure) -> None: _stacked_subplots_ind = {} _stacked_subplots_offsets = [] - if self.subplots != False & self.stacked: + if self.subplots: + subplots_status = 1 + else: + subplots_status = 0 + + if subplots_status & self.stacked: sub_range = range(len(self.subplots)) ss_temp = { x: self.subplots[x] for x in sub_range if len(self.subplots[x]) > 1 From df51168837626684ccbf720e8090ac39e0f92fff Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Wed, 23 Apr 2025 06:25:54 +0000 Subject: [PATCH 12/13] Added ignore where mypy thinks self.subplots is a bool --- pandas/plotting/_matplotlib/core.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 7ebc8948b91b5..618ecee1fdb09 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1929,18 +1929,16 @@ def _make_plot(self, fig: Figure) -> None: data = self.data.fillna(0) - _stacked_subplots_ind = {} + _stacked_subplots_ind: dict[int, int] = {} _stacked_subplots_offsets = [] - if self.subplots: - subplots_status = 1 - else: - subplots_status = 0 - - if subplots_status & self.stacked: - sub_range = range(len(self.subplots)) + if self.subplots != False & self.stacked: + sub_range = range(len(self.subplots)) # type:ignore[arg-type] ss_temp = { - x: self.subplots[x] for x in sub_range if len(self.subplots[x]) > 1 + # mypy thinks self.subplots is a bool :( + x: self.subplots[x] # type:ignore[index] + for x in sub_range + if len(self.subplots[x]) > 1 # type:ignore[index] } for k, v in ss_temp.items(): for x in v: @@ -1976,7 +1974,7 @@ def _make_plot(self, fig: Figure) -> None: if i in _stacked_subplots_ind: offset_index = _stacked_subplots_ind[i] - pos_prior, neg_prior = _stacked_subplots_offsets[offset_index] + pos_prior, neg_prior = _stacked_subplots_offsets[offset_index] # type:ignore[assignment] mask = y >= 0 start = np.where(mask, pos_prior, neg_prior) + self._start_base w = self.bar_width / 2 From bfb1dd12d6ddc895331e692801dc5d185bcca4a4 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Wed, 23 Apr 2025 06:59:08 +0000 Subject: [PATCH 13/13] Actually addressed mypy typing --- pandas/plotting/_matplotlib/core.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 618ecee1fdb09..dc931e26ca660 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1932,13 +1932,16 @@ def _make_plot(self, fig: Figure) -> None: _stacked_subplots_ind: dict[int, int] = {} _stacked_subplots_offsets = [] - if self.subplots != False & self.stacked: - sub_range = range(len(self.subplots)) # type:ignore[arg-type] + self.subplots: list[Any] + if self.subplots: + subplots_flag = 1 + else: + subplots_flag = 0 + + if subplots_flag & self.stacked: + sub_range = range(len(self.subplots)) ss_temp = { - # mypy thinks self.subplots is a bool :( - x: self.subplots[x] # type:ignore[index] - for x in sub_range - if len(self.subplots[x]) > 1 # type:ignore[index] + x: self.subplots[x] for x in sub_range if len(self.subplots[x]) > 1 } for k, v in ss_temp.items(): for x in v: