Skip to content

Inconsistent indexes for tick label plotting #28733

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Nov 21, 2020
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ Plotting
^^^^^^^^

- Bug in :meth:`DataFrame.plot` where a marker letter in the ``style`` keyword sometimes causes a ``ValueError`` (:issue:`21003`)
- Bug in :func:`DataFrame.plot.bar` and :func:`Series.plot.bar`. Ticks position were assigned by value order instead of using the actual value for numeric, or a smart ordering for string. (:issue:`26186` and :issue:`11465`)

Groupby/resample/rolling
^^^^^^^^^^^^^^^^^^^^^^^^
Expand Down
25 changes: 21 additions & 4 deletions pandas/plotting/_matplotlib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1333,7 +1333,6 @@ def __init__(self, data, **kwargs):
self.bar_width = kwargs.pop("width", 0.5)
pos = kwargs.pop("position", 0.5)
kwargs.setdefault("align", "center")
self.tick_pos = np.arange(len(data))

self.bottom = kwargs.pop("bottom", 0)
self.left = kwargs.pop("left", 0)
Expand All @@ -1356,7 +1355,16 @@ def __init__(self, data, **kwargs):
self.tickoffset = self.bar_width * pos
self.lim_offset = 0

self.ax_pos = self.tick_pos - self.tickoffset
if isinstance(self.data.index, ABCMultiIndex):
if kwargs["ax"] is not None and kwargs["ax"].has_data():
warnings.warn(
"Redrawing a bar plot with a MultiIndex is not supported "
+ "and may lead to inconsistent label positions.",
UserWarning,
)
self.ax_index = np.arange(len(data))
else:
self.ax_index = self.data.index

def _args_adjust(self):
if is_list_like(self.bottom):
Expand All @@ -1383,6 +1391,15 @@ def _make_plot(self):

for i, (label, y) in enumerate(self._iter_data(fillna=0)):
ax = self._get_ax(i)

if self.orientation == "vertical":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there any assert in orientation that it takes on only certain values?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No assert. Orientation is never a parameter. It is defined by the BarPlot class as "vertical", and overloaded by the Barhplot class as "horizontal". It is also defined in LinePlot class.
Globaly, it is use by the _post_plot_logic_common method from the base class MPLPlot.

ax.xaxis.update_units(self.ax_index)
self.tick_pos = ax.convert_xunits(self.ax_index).astype(np.int)
elif self.orientation == "horizontal":
ax.yaxis.update_units(self.ax_index)
self.tick_pos = ax.convert_yunits(self.ax_index).astype(np.int)
self.ax_pos = self.tick_pos - self.tickoffset

kwds = self.kwds.copy()
if self._is_series:
kwds["color"] = colors
Expand Down Expand Up @@ -1454,8 +1471,8 @@ def _post_plot_logic(self, ax: "Axes", data):
str_index = [pprint_thing(key) for key in range(data.shape[0])]
name = self._get_index_name()

s_edge = self.ax_pos[0] - 0.25 + self.lim_offset
e_edge = self.ax_pos[-1] + 0.25 + self.bar_width + self.lim_offset
s_edge = self.ax_pos.min() - 0.25 + self.lim_offset
e_edge = self.ax_pos.max() + 0.25 + self.bar_width + self.lim_offset

self._decorate_ticks(ax, name, str_index, s_edge, e_edge)

Expand Down
75 changes: 75 additions & 0 deletions pandas/tests/plotting/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3440,6 +3440,81 @@ def test_xlabel_ylabel_dataframe_subplots(
assert all(ax.get_ylabel() == str(new_label) for ax in axes)
assert all(ax.get_xlabel() == str(new_label) for ax in axes)

@pytest.mark.slow
@pytest.mark.parametrize("method", ["bar", "barh"])
def test_bar_ticklabel_consistence(self, method):
# Draw two consecutiv bar plot with consistent ticklabels
# The labels positions should not move between two drawing on the same axis
# GH: 26186
def get_main_axis(ax):
if method == "barh":
return ax.yaxis
elif method == "bar":
return ax.xaxis

# Plot the first bar plot
data = {"A": 0, "B": 3, "C": -4}
df = pd.DataFrame.from_dict(data, orient="index", columns=["Value"])
ax = getattr(df.plot, method)()
ax.get_figure().canvas.draw()

# Retrieve the label positions for the first drawing
xticklabels = [t.get_text() for t in get_main_axis(ax).get_ticklabels()]
label_positions_1 = dict(zip(xticklabels, get_main_axis(ax).get_ticklocs()))

# Modify the dataframe order and values and plot on same axis
df = df.sort_values("Value") * -2
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you put a blank line between cases, plus a comment which explains what it is testing if not obvious

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added comment to explicit what the test is doing and why.

ax = getattr(df.plot, method)(ax=ax, color="red")
ax.get_figure().canvas.draw()

# Retrieve the label positions for the second drawing
xticklabels = [t.get_text() for t in get_main_axis(ax).get_ticklabels()]
label_positions_2 = dict(zip(xticklabels, get_main_axis(ax).get_ticklocs()))

# Assert that the label positions did not change between the plotting
assert label_positions_1 == label_positions_2

def test_bar_numeric(self):
# Bar plot with numeric index have tick location values equal to index
# values
# GH: 11465
df = pd.DataFrame(np.random.rand(10), index=np.arange(10, 20))
ax = df.plot.bar()
ticklocs = ax.xaxis.get_ticklocs()
expected = np.arange(10, 20, dtype=np.int64)
tm.assert_numpy_array_equal(ticklocs, expected)

def test_bar_multiindex(self):
# Test from pandas/doc/source/user_guide/visualization.rst
# at section Plotting With Error Bars
# Related to issue GH: 26186

ix3 = pd.MultiIndex.from_arrays(
[
["a", "a", "a", "a", "b", "b", "b", "b"],
["foo", "foo", "bar", "bar", "foo", "foo", "bar", "bar"],
],
names=["letter", "word"],
)

df3 = pd.DataFrame(
{"data1": [3, 2, 4, 3, 2, 4, 3, 2], "data2": [6, 5, 7, 5, 4, 5, 6, 5]},
index=ix3,
)

# Group by index labels and take the means and standard deviations
# for each group
gp3 = df3.groupby(level=("letter", "word"))
means = gp3.mean()
errors = gp3.std()

# No assertion we just ensure that we can plot a MultiIndex bar plot
# and are getting a UserWarning if redrawing
with tm.assert_produces_warning(None):
ax = means.plot.bar(yerr=errors, capsize=4)
with tm.assert_produces_warning(UserWarning):
means.plot.bar(yerr=errors, capsize=4, ax=ax)


def _generate_4_axes_via_gridspec():
import matplotlib as mpl
Expand Down