Skip to content

Commit 110406c

Browse files
author
Tom Augspurger
committed
Merge pull request #6691 from sinhrks/bar_center_pr
ENH/VIS: Dataframe bar plot can now handle align keyword properly
2 parents 5999ac8 + 06557db commit 110406c

File tree

4 files changed

+164
-99
lines changed

4 files changed

+164
-99
lines changed

doc/source/release.rst

+8-2
Original file line numberDiff line numberDiff line change
@@ -135,10 +135,16 @@ API Changes
135135
the index, rather than requiring a list of tuple (:issue:`4370`)
136136

137137
- Fix a bug where invalid eval/query operations would blow the stack (:issue:`5198`)
138+
138139
- Following keywords are now acceptable for :meth:`DataFrame.plot(kind='bar')` and :meth:`DataFrame.plot(kind='barh')`.
139-
- `width`: Specify the bar width. In previous versions, static value 0.5 was passed to matplotlib and it cannot be overwritten.
140+
141+
- `width`: Specify the bar width. In previous versions, static value 0.5 was passed to matplotlib and it cannot be overwritten. (:issue:`6604`)
142+
143+
- `align`: Specify the bar alignment. Default is `center` (different from matplotlib). In previous versions, pandas passes `align='edge'` to matplotlib and adjust the location to `center` by itself, and it results `align` keyword is not applied as expected. (:issue:`4525`)
144+
140145
- `position`: Specify relative alignments for bar plot layout. From 0 (left/bottom-end) to 1(right/top-end). Default is 0.5 (center). (:issue:`6604`)
141-
- Define and document the order of column vs index names in query/eval
146+
147+
- Define and document the order of column vs index names in query/eval
142148
(:issue:`6676`)
143149

144150
Deprecations

doc/source/v0.14.0.txt

+7-1
Original file line numberDiff line numberDiff line change
@@ -174,9 +174,15 @@ These are out-of-bounds selections
174174
df_multi.set_index([df_multi.index, df_multi.index])
175175

176176
- Following keywords are now acceptable for :meth:`DataFrame.plot(kind='bar')` and :meth:`DataFrame.plot(kind='barh')`.
177-
- `width`: Specify the bar width. In previous versions, static value 0.5 was passed to matplotlib and it cannot be overwritten.
177+
178+
- `width`: Specify the bar width. In previous versions, static value 0.5 was passed to matplotlib and it cannot be overwritten. (:issue:`6604`)
179+
180+
- `align`: Specify the bar alignment. Default is `center` (different from matplotlib). In previous versions, pandas passes `align='edge'` to matplotlib and adjust the location to `center` by itself, and it results `align` keyword is not applied as expected. (:issue:`4525`)
181+
178182
- `position`: Specify relative alignments for bar plot layout. From 0 (left/bottom-end) to 1(right/top-end). Default is 0.5 (center). (:issue:`6604`)
179183

184+
Because of the default `align` value changes, coordinates of bar plots are now located on integer values (0.0, 1.0, 2.0 ...). This is intended to make bar plot be located on the same coodinates as line plot. However, bar plot may differs unexpectedly when you manually adjust the bar location or drawing area, such as using `set_xlim`, `set_ylim`, etc. In this cases, please modify your script to meet with new coordinates.
185+
180186

181187
MultiIndexing Using Slicers
182188
~~~~~~~~~~~~~~~~~~~~~~~~~~~

pandas/tests/test_graphics.py

+126-85
Original file line numberDiff line numberDiff line change
@@ -615,47 +615,12 @@ def test_bar_barwidth(self):
615615
@slow
616616
def test_bar_barwidth_position(self):
617617
df = DataFrame(randn(5, 5))
618-
619-
width = 0.9
620-
position = 0.2
621-
622-
# regular
623-
ax = df.plot(kind='bar', width=width, position=position)
624-
p = ax.patches[0]
625-
self.assertEqual(ax.xaxis.get_ticklocs()[0],
626-
p.get_x() + p.get_width() * position * len(df.columns))
627-
628-
# stacked
629-
ax = df.plot(kind='bar', stacked=True, width=width, position=position)
630-
p = ax.patches[0]
631-
self.assertEqual(ax.xaxis.get_ticklocs()[0],
632-
p.get_x() + p.get_width() * position)
633-
634-
# horizontal regular
635-
ax = df.plot(kind='barh', width=width, position=position)
636-
p = ax.patches[0]
637-
self.assertEqual(ax.yaxis.get_ticklocs()[0],
638-
p.get_y() + p.get_height() * position * len(df.columns))
639-
640-
# horizontal stacked
641-
ax = df.plot(kind='barh', stacked=True, width=width, position=position)
642-
p = ax.patches[0]
643-
self.assertEqual(ax.yaxis.get_ticklocs()[0],
644-
p.get_y() + p.get_height() * position)
645-
646-
# subplots
647-
axes = df.plot(kind='bar', width=width, position=position, subplots=True)
648-
for ax in axes:
649-
p = ax.patches[0]
650-
self.assertEqual(ax.xaxis.get_ticklocs()[0],
651-
p.get_x() + p.get_width() * position)
652-
653-
# horizontal subplots
654-
axes = df.plot(kind='barh', width=width, position=position, subplots=True)
655-
for ax in axes:
656-
p = ax.patches[0]
657-
self.assertEqual(ax.yaxis.get_ticklocs()[0],
658-
p.get_y() + p.get_height() * position)
618+
self._check_bar_alignment(df, kind='bar', stacked=False, width=0.9, position=0.2)
619+
self._check_bar_alignment(df, kind='bar', stacked=True, width=0.9, position=0.2)
620+
self._check_bar_alignment(df, kind='barh', stacked=False, width=0.9, position=0.2)
621+
self._check_bar_alignment(df, kind='barh', stacked=True, width=0.9, position=0.2)
622+
self._check_bar_alignment(df, kind='bar', subplots=True, width=0.9, position=0.2)
623+
self._check_bar_alignment(df, kind='barh', subplots=True, width=0.9, position=0.2)
659624

660625
@slow
661626
def test_plot_scatter(self):
@@ -692,68 +657,144 @@ def test_plot_bar(self):
692657
df = DataFrame({'a': [0, 1], 'b': [1, 0]})
693658
_check_plot_works(df.plot, kind='bar')
694659

660+
def _check_bar_alignment(self, df, kind='bar', stacked=False,
661+
subplots=False, align='center',
662+
width=0.5, position=0.5):
663+
664+
axes = df.plot(kind=kind, stacked=stacked, subplots=subplots,
665+
align=align, width=width, position=position,
666+
grid=True)
667+
668+
tick_pos = np.arange(len(df))
669+
670+
if not isinstance(axes, np.ndarray):
671+
axes = [axes]
672+
673+
for ax in axes:
674+
if kind == 'bar':
675+
axis = ax.xaxis
676+
ax_min, ax_max = ax.get_xlim()
677+
elif kind == 'barh':
678+
axis = ax.yaxis
679+
ax_min, ax_max = ax.get_ylim()
680+
else:
681+
raise ValueError
682+
683+
p = ax.patches[0]
684+
if kind == 'bar' and (stacked is True or subplots is True):
685+
edge = p.get_x()
686+
center = edge + p.get_width() * position
687+
tickoffset = width * position
688+
elif kind == 'bar' and stacked is False:
689+
center = p.get_x() + p.get_width() * len(df.columns) * position
690+
edge = p.get_x()
691+
if align == 'edge':
692+
tickoffset = width * (position - 0.5) + p.get_width() * 1.5
693+
else:
694+
tickoffset = width * position + p.get_width()
695+
elif kind == 'barh' and (stacked is True or subplots is True):
696+
center = p.get_y() + p.get_height() * position
697+
edge = p.get_y()
698+
tickoffset = width * position
699+
elif kind == 'barh' and stacked is False:
700+
center = p.get_y() + p.get_height() * len(df.columns) * position
701+
edge = p.get_y()
702+
if align == 'edge':
703+
tickoffset = width * (position - 0.5) + p.get_height() * 1.5
704+
else:
705+
tickoffset = width * position + p.get_height()
706+
else:
707+
raise ValueError
708+
709+
# Check the ticks locates on integer
710+
self.assertTrue((axis.get_ticklocs() == np.arange(len(df))).all())
711+
712+
if align == 'center':
713+
# Check whether the bar locates on center
714+
self.assertAlmostEqual(axis.get_ticklocs()[0], center)
715+
elif align == 'edge':
716+
# Check whether the bar's edge starts from the tick
717+
self.assertAlmostEqual(axis.get_ticklocs()[0], edge)
718+
else:
719+
raise ValueError
720+
721+
# Check starting point and axes limit margin
722+
self.assertEqual(ax_min, tick_pos[0] - tickoffset - 0.25)
723+
self.assertEqual(ax_max, tick_pos[-1] - tickoffset + 1)
724+
# Check tick locations and axes limit margin
725+
t_min = axis.get_ticklocs()[0] - tickoffset
726+
t_max = axis.get_ticklocs()[-1] - tickoffset
727+
self.assertAlmostEqual(ax_min, t_min - 0.25)
728+
self.assertAlmostEqual(ax_max, t_max + 1.0)
729+
return axes
730+
731+
@slow
695732
def test_bar_stacked_center(self):
696733
# GH2157
697734
df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5))
698-
ax = df.plot(kind='bar', stacked='True', grid=True)
699-
self.assertEqual(ax.xaxis.get_ticklocs()[0],
700-
ax.patches[0].get_x() + ax.patches[0].get_width() / 2)
735+
axes = self._check_bar_alignment(df, kind='bar', stacked=True)
736+
# Check the axes has the same drawing range before fixing # GH4525
737+
self.assertEqual(axes[0].get_xlim(), (-0.5, 4.75))
701738

702-
ax = df.plot(kind='bar', stacked='True', width=0.9, grid=True)
703-
self.assertEqual(ax.xaxis.get_ticklocs()[0],
704-
ax.patches[0].get_x() + ax.patches[0].get_width() / 2)
739+
self._check_bar_alignment(df, kind='bar', stacked=True, width=0.9)
705740

706-
ax = df.plot(kind='barh', stacked='True', grid=True)
707-
self.assertEqual(ax.yaxis.get_ticklocs()[0],
708-
ax.patches[0].get_y() + ax.patches[0].get_height() / 2)
741+
axes = self._check_bar_alignment(df, kind='barh', stacked=True)
742+
self.assertEqual(axes[0].get_ylim(), (-0.5, 4.75))
709743

710-
ax = df.plot(kind='barh', stacked='True', width=0.9, grid=True)
711-
self.assertEqual(ax.yaxis.get_ticklocs()[0],
712-
ax.patches[0].get_y() + ax.patches[0].get_height() / 2)
744+
self._check_bar_alignment(df, kind='barh', stacked=True, width=0.9)
713745

746+
@slow
714747
def test_bar_center(self):
715748
df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5))
716-
ax = df.plot(kind='bar', grid=True)
717-
self.assertEqual(ax.xaxis.get_ticklocs()[0],
718-
ax.patches[0].get_x() + ax.patches[0].get_width())
719-
720-
ax = df.plot(kind='bar', width=0.9, grid=True)
721-
self.assertEqual(ax.xaxis.get_ticklocs()[0],
722-
ax.patches[0].get_x() + ax.patches[0].get_width())
749+
axes = self._check_bar_alignment(df, kind='bar', stacked=False)
750+
self.assertEqual(axes[0].get_xlim(), (-0.75, 4.5))
723751

724-
ax = df.plot(kind='barh', grid=True)
725-
self.assertEqual(ax.yaxis.get_ticklocs()[0],
726-
ax.patches[0].get_y() + ax.patches[0].get_height())
727-
728-
ax = df.plot(kind='barh', width=0.9, grid=True)
729-
self.assertEqual(ax.yaxis.get_ticklocs()[0],
730-
ax.patches[0].get_y() + ax.patches[0].get_height())
752+
self._check_bar_alignment(df, kind='bar', stacked=False, width=0.9)
753+
754+
axes = self._check_bar_alignment(df, kind='barh', stacked=False)
755+
self.assertEqual(axes[0].get_ylim(), (-0.75, 4.5))
731756

757+
self._check_bar_alignment(df, kind='barh', stacked=False, width=0.9)
758+
759+
@slow
732760
def test_bar_subplots_center(self):
733761
df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5))
734-
axes = df.plot(kind='bar', grid=True, subplots=True)
762+
axes = self._check_bar_alignment(df, kind='bar', subplots=True)
735763
for ax in axes:
736-
for r in ax.patches:
737-
self.assertEqual(ax.xaxis.get_ticklocs()[0],
738-
ax.patches[0].get_x() + ax.patches[0].get_width() / 2)
764+
self.assertEqual(ax.get_xlim(), (-0.5, 4.75))
739765

740-
axes = df.plot(kind='bar', width=0.9, grid=True, subplots=True)
766+
self._check_bar_alignment(df, kind='bar', subplots=True, width=0.9)
767+
768+
axes = self._check_bar_alignment(df, kind='barh', subplots=True)
741769
for ax in axes:
742-
for r in ax.patches:
743-
self.assertEqual(ax.xaxis.get_ticklocs()[0],
744-
ax.patches[0].get_x() + ax.patches[0].get_width() / 2)
770+
self.assertEqual(ax.get_ylim(), (-0.5, 4.75))
745771

746-
axes = df.plot(kind='barh', grid=True, subplots=True)
747-
for ax in axes:
748-
for r in ax.patches:
749-
self.assertEqual(ax.yaxis.get_ticklocs()[0],
750-
ax.patches[0].get_y() + ax.patches[0].get_height() / 2)
772+
self._check_bar_alignment(df, kind='barh', subplots=True, width=0.9)
751773

752-
axes = df.plot(kind='barh', width=0.9, grid=True, subplots=True)
753-
for ax in axes:
754-
for r in ax.patches:
755-
self.assertEqual(ax.yaxis.get_ticklocs()[0],
756-
ax.patches[0].get_y() + ax.patches[0].get_height() / 2)
774+
@slow
775+
def test_bar_edge(self):
776+
df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5))
777+
778+
self._check_bar_alignment(df, kind='bar', stacked=True, align='edge')
779+
self._check_bar_alignment(df, kind='bar', stacked=True,
780+
width=0.9, align='edge')
781+
self._check_bar_alignment(df, kind='barh', stacked=True, align='edge')
782+
self._check_bar_alignment(df, kind='barh', stacked=True,
783+
width=0.9, align='edge')
784+
785+
self._check_bar_alignment(df, kind='bar', stacked=False, align='edge')
786+
self._check_bar_alignment(df, kind='bar', stacked=False,
787+
width=0.9, align='edge')
788+
self._check_bar_alignment(df, kind='barh', stacked=False, align='edge')
789+
self._check_bar_alignment(df, kind='barh', stacked=False,
790+
width=0.9, align='edge')
791+
792+
self._check_bar_alignment(df, kind='bar', subplots=True, align='edge')
793+
self._check_bar_alignment(df, kind='bar', subplots=True,
794+
width=0.9, align='edge')
795+
self._check_bar_alignment(df, kind='barh', subplots=True, align='edge')
796+
self._check_bar_alignment(df, kind='barh', subplots=True,
797+
width=0.9, align='edge')
757798

758799
@slow
759800
def test_bar_log_no_subplots(self):

pandas/tools/plotting.py

+23-11
Original file line numberDiff line numberDiff line change
@@ -1673,18 +1673,26 @@ def __init__(self, data, **kwargs):
16731673
self.mark_right = kwargs.pop('mark_right', True)
16741674
self.stacked = kwargs.pop('stacked', False)
16751675

1676-
self.bar_width = kwargs.pop('width', 0.5)
1676+
self.bar_width = kwargs.pop('width', 0.5)
16771677
pos = kwargs.pop('position', 0.5)
1678-
self.ax_pos = np.arange(len(data)) + self.bar_width * pos
16791678

1679+
kwargs['align'] = kwargs.pop('align', 'center')
1680+
self.tick_pos = np.arange(len(data))
1681+
16801682
self.log = kwargs.pop('log',False)
16811683
MPLPlot.__init__(self, data, **kwargs)
16821684

16831685
if self.stacked or self.subplots:
1684-
self.tickoffset = self.bar_width * pos
1686+
self.tickoffset = self.bar_width * pos
1687+
elif kwargs['align'] == 'edge':
1688+
K = self.nseries
1689+
w = self.bar_width / K
1690+
self.tickoffset = self.bar_width * (pos - 0.5) + w * 1.5
16851691
else:
16861692
K = self.nseries
1687-
self.tickoffset = self.bar_width * pos + self.bar_width / K
1693+
w = self.bar_width / K
1694+
self.tickoffset = self.bar_width * pos + w
1695+
self.ax_pos = self.tick_pos - self.tickoffset
16881696

16891697
def _args_adjust(self):
16901698
if self.rot is None:
@@ -1751,19 +1759,21 @@ def _make_plot(self):
17511759
start = 0 if mpl_le_1_2_1 else None
17521760

17531761
if self.subplots:
1754-
rect = bar_f(ax, self.ax_pos, y, self.bar_width,
1762+
w = self.bar_width / 2
1763+
rect = bar_f(ax, self.ax_pos + w, y, self.bar_width,
17551764
start=start, **kwds)
17561765
ax.set_title(label)
17571766
elif self.stacked:
17581767
mask = y > 0
17591768
start = np.where(mask, pos_prior, neg_prior)
1760-
rect = bar_f(ax, self.ax_pos, y, self.bar_width, start=start,
1761-
label=label, **kwds)
1769+
w = self.bar_width / 2
1770+
rect = bar_f(ax, self.ax_pos + w, y, self.bar_width,
1771+
start=start, label=label, **kwds)
17621772
pos_prior = pos_prior + np.where(mask, y, 0)
17631773
neg_prior = neg_prior + np.where(mask, 0, y)
17641774
else:
17651775
w = self.bar_width / K
1766-
rect = bar_f(ax, self.ax_pos + (i + 1) * w, y, w,
1776+
rect = bar_f(ax, self.ax_pos + (i + 1.5) * w, y, w,
17671777
start=start, label=label, **kwds)
17681778
rects.append(rect)
17691779
if self.mark_right:
@@ -1789,22 +1799,24 @@ def _post_plot_logic(self):
17891799
name = self._get_index_name()
17901800
if self.kind == 'bar':
17911801
ax.set_xlim([self.ax_pos[0] - 0.25, self.ax_pos[-1] + 1])
1792-
ax.set_xticks(self.ax_pos + self.tickoffset)
1802+
ax.set_xticks(self.tick_pos)
17931803
ax.set_xticklabels(str_index, rotation=self.rot,
17941804
fontsize=self.fontsize)
17951805
if not self.log: # GH3254+
17961806
ax.axhline(0, color='k', linestyle='--')
17971807
if name is not None:
17981808
ax.set_xlabel(name)
1799-
else:
1809+
elif self.kind == 'barh':
18001810
# horizontal bars
18011811
ax.set_ylim([self.ax_pos[0] - 0.25, self.ax_pos[-1] + 1])
1802-
ax.set_yticks(self.ax_pos + self.tickoffset)
1812+
ax.set_yticks(self.tick_pos)
18031813
ax.set_yticklabels(str_index, rotation=self.rot,
18041814
fontsize=self.fontsize)
18051815
ax.axvline(0, color='k', linestyle='--')
18061816
if name is not None:
18071817
ax.set_ylabel(name)
1818+
else:
1819+
raise NotImplementedError(self.kind)
18081820

18091821
# if self.subplots and self.legend:
18101822
# self.axes[0].legend(loc='best')

0 commit comments

Comments
 (0)