Skip to content

BUG: weights is not working for multiple columns in df.plot.hist #33440

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Apr 10, 2020
Merged
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,7 @@ Plotting
^^^^^^^^

- :func:`.plot` for line/bar now accepts color by dictonary (:issue:`8193`).
-
- Bug in :meth:`DataFrame.plot.hist` where weights are not working for multiple columns (:issue:`33173`)
- Bug in :meth:`DataFrame.boxplot` and :meth:`DataFrame.plot.boxplot` lost color attributes of ``medianprops``, ``whiskerprops``, ``capprops`` and ``medianprops`` (:issue:`30346`)


Expand Down
13 changes: 9 additions & 4 deletions pandas/plotting/_matplotlib/hist.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,7 @@ def _args_adjust(self):
values = values[~isna(values)]

_, self.bins = np.histogram(
values,
bins=self.bins,
range=self.kwds.get("range", None),
weights=self.kwds.get("weights", None),
values, bins=self.bins, range=self.kwds.get("range", None)
)

if is_list_like(self.bottom):
Expand Down Expand Up @@ -77,6 +74,14 @@ def _make_plot(self):
kwds["style"] = style

kwds = self._make_plot_keywords(kwds, y)

# We allow weights to be a multi-dimensional array, e.g. a (10, 2) array,
# and each sub-array (10,) will be called in each iteration. If users only
# provide 1D array, we assume the same weights is used for all iterations
weights = kwds.get("weights", None)
if weights is not None and np.ndim(weights) != 1:
kwds["weights"] = weights[:, i]

artists = self._plot(ax, y, column_num=i, stacking_id=stacking_id, **kwds)
self._add_legend_handle(artists[0], label, index=i)

Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/plotting/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1682,6 +1682,25 @@ def test_hist_df(self):
axes = df.plot.hist(rot=50, fontsize=8, orientation="horizontal")
self._check_ticks_props(axes, xrot=0, yrot=50, ylabelsize=8)

@pytest.mark.parametrize(
"weights", [0.1 * np.ones(shape=(100,)), 0.1 * np.ones(shape=(100, 2))]
)
def test_hist_weights(self, weights):
# GH 33173
np.random.seed(0)
df = pd.DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100,))))

ax1 = _check_plot_works(df.plot, kind="hist", weights=weights)
ax2 = _check_plot_works(df.plot, kind="hist")

patch_height_with_weights = [patch.get_height() for patch in ax1.patches]

# original heights with no weights, and we manually multiply with example
# weights, so after multiplication, they should be almost same
expected_patch_height = [0.1 * patch.get_height() for patch in ax2.patches]

tm.assert_almost_equal(patch_height_with_weights, expected_patch_height)

def _check_box_coord(
self,
patches,
Expand Down