From 6a9eb519465ce108aa1fc3906dc7c2f159c44141 Mon Sep 17 00:00:00 2001 From: Nis Martensen Date: Sat, 10 Jun 2017 15:36:24 +0200 Subject: [PATCH 1/6] TST: Add lineplot tests with unsorted x data Two new tests check interaction of non-monotonic x data and xlims: test_frame / test_unsorted_index_lims test_series / test_unsorted_index_xlim --- pandas/tests/plotting/test_frame.py | 23 +++++++++++++++++++++++ pandas/tests/plotting/test_series.py | 10 ++++++++++ 2 files changed, 33 insertions(+) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index f3b287a8889c3..186100cdcb345 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -304,6 +304,29 @@ def test_unsorted_index(self): rs = Series(rs[:, 1], rs[:, 0], dtype=np.int64, name='y') tm.assert_series_equal(rs, df.y) + def test_unsorted_index_lims(self): + df = DataFrame({'y': [0., 1., 2., 3.]}, index=[1., 0., 3., 2.]) + ax = df.plot() + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= np.nanmin(lines[0].get_data()[0]) + assert xmax >= np.nanmax(lines[0].get_data()[0]) + + df = DataFrame({'y': [0., 1., np.nan, 3., 4., 5., 6.]}, + index=[1., 0., 3., 2., np.nan, 3., 2.]) + ax = df.plot() + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= np.nanmin(lines[0].get_data()[0]) + assert xmax >= np.nanmax(lines[0].get_data()[0]) + + df = DataFrame({'y': [0., 1., 2., 3.], 'z': [91., 90., 93., 92.]}) + ax = df.plot(x='z', y='y') + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= np.nanmin(lines[0].get_data()[0]) + assert xmax >= np.nanmax(lines[0].get_data()[0]) + @pytest.mark.slow def test_subplots(self): df = DataFrame(np.random.rand(10, 3), diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 8164ad74a190a..6b337649d4595 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -279,6 +279,16 @@ def test_irregular_datetime(self): ax.set_xlim('1/1/1999', '1/1/2001') assert xp == ax.get_xlim()[0] + def test_unsorted_index_xlim(self): + ser = Series([0., 1., np.nan, 3., 4., 5., 6.], + index=[1., 0., 3., 2., np.nan, 3., 2.]) + _, ax = self.plt.subplots() + ax = ser.plot(ax=ax) + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= np.nanmin(lines[0].get_data(orig=False)[0]) + assert xmax >= np.nanmax(lines[0].get_data(orig=False)[0]) + @pytest.mark.slow def test_pie_series(self): # if sum of values is less than 1.0, pie handle them as rate and draw From 603b5f15cead9518ecb54ac479ea46c8c204e3e1 Mon Sep 17 00:00:00 2001 From: Nis Martensen Date: Mon, 5 Jun 2017 09:40:36 +0200 Subject: [PATCH 2/6] BUG: set correct xlims for lines (#11471, #11310) * Do not assume that xdata is sorted. * Use numpy.nanmin() and numpy.nanmax() instead. --- pandas/plotting/_tools.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/plotting/_tools.py b/pandas/plotting/_tools.py index c734855bdc09a..047a57ead72f8 100644 --- a/pandas/plotting/_tools.py +++ b/pandas/plotting/_tools.py @@ -361,8 +361,8 @@ def _get_xlim(lines): left, right = np.inf, -np.inf for l in lines: x = l.get_xdata(orig=False) - left = min(x[0], left) - right = max(x[-1], right) + left = min(np.nanmin(x), left) + right = max(np.nanmax(x), right) return left, right From 0a7bbbe94737091e53fbcad53aadef74f4ba3222 Mon Sep 17 00:00:00 2001 From: Nis Martensen Date: Mon, 5 Jun 2017 09:54:27 +0200 Subject: [PATCH 3/6] BUG: Let new MPL automatically determine xlims (#15495) * Avoid setting xlims since recent matplotlib already does it correctly * and we should let it apply its default styles where possible --- pandas/plotting/_core.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index a0b7e93efd05c..c84ed88c6ec23 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -31,7 +31,8 @@ from pandas.util._decorators import Appender from pandas.plotting._compat import (_mpl_ge_1_3_1, - _mpl_ge_1_5_0) + _mpl_ge_1_5_0, + _mpl_ge_2_0_0) from pandas.plotting._style import (mpl_stylesheet, plot_params, _get_standard_colors) from pandas.plotting._tools import (_subplots, _flatten, table, @@ -969,9 +970,10 @@ def _make_plot(self): **kwds) self._add_legend_handle(newlines[0], label, index=i) - lines = _get_all_lines(ax) - left, right = _get_xlim(lines) - ax.set_xlim(left, right) + if not _mpl_ge_2_0_0(): + lines = _get_all_lines(ax) + left, right = _get_xlim(lines) + ax.set_xlim(left, right) @classmethod def _plot(cls, ax, x, y, style=None, column_num=None, From e68580a39a78828e07f289acd9db70b07f19bf16 Mon Sep 17 00:00:00 2001 From: Nis Martensen Date: Sat, 10 Jun 2017 14:32:40 +0200 Subject: [PATCH 4/6] TST: plotting: update expected results for matplotlib 2 Matplotlib 2.0 uses new defaults that cause some of our tests to fail. This adds appropriate new sets of expected results to the following tests in tests/plotting/test_datetimelike.py: test_finder_daily test_finder_quarterly test_finder_annual test_finder_hourly test_finder_minutely test_finder_monthly test_format_timedelta_ticks_narrow test_format_timedelta_ticks_wide --- pandas/tests/plotting/test_datetimelike.py | 100 ++++++++++++++++----- 1 file changed, 77 insertions(+), 23 deletions(-) diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index eb10e70f4189b..6e743489b67be 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -386,9 +386,16 @@ def test_get_finder(self): @pytest.mark.slow def test_finder_daily(self): - xp = Period('1999-1-1', freq='B').ordinal day_lst = [10, 40, 252, 400, 950, 2750, 10000] - for n in day_lst: + + if self.mpl_ge_2_0_0: + xpl1 = [7565, 7564, 7553, 7546, 7518, 7428, 7066] + xpl2 = [7566, 7564, 7554, 7546, 7519, 7429, 7066] + else: + xpl1 = xpl2 = [Period('1999-1-1', freq='B').ordinal] * len(day_lst) + + for i, n in enumerate(day_lst): + xp = xpl1[i] rng = bdate_range('1999-1-1', periods=n) ser = Series(np.random.randn(len(rng)), rng) _, ax = self.plt.subplots() @@ -396,6 +403,7 @@ def test_finder_daily(self): xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] assert xp == rs + xp = xpl2[i] vmin, vmax = ax.get_xlim() ax.set_xlim(vmin + 0.9, vmax) rs = xaxis.get_majorticklocs()[0] @@ -404,9 +412,16 @@ def test_finder_daily(self): @pytest.mark.slow def test_finder_quarterly(self): - xp = Period('1988Q1').ordinal yrs = [3.5, 11] - for n in yrs: + + if self.mpl_ge_2_0_0: + xpl1 = [68, 68] + xpl2 = [72, 68] + else: + xpl1 = xpl2 = [Period('1988Q1').ordinal] * len(yrs) + + for i, n in enumerate(yrs): + xp = xpl1[i] rng = period_range('1987Q2', periods=int(n * 4), freq='Q') ser = Series(np.random.randn(len(rng)), rng) _, ax = self.plt.subplots() @@ -414,6 +429,7 @@ def test_finder_quarterly(self): xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] assert rs == xp + xp = xpl2[i] (vmin, vmax) = ax.get_xlim() ax.set_xlim(vmin + 0.9, vmax) rs = xaxis.get_majorticklocs()[0] @@ -422,9 +438,16 @@ def test_finder_quarterly(self): @pytest.mark.slow def test_finder_monthly(self): - xp = Period('Jan 1988').ordinal yrs = [1.15, 2.5, 4, 11] - for n in yrs: + + if self.mpl_ge_2_0_0: + xpl1 = [216, 216, 204, 204] + xpl2 = [216, 216, 216, 204] + else: + xpl1 = xpl2 = [Period('Jan 1988').ordinal] * len(yrs) + + for i, n in enumerate(yrs): + xp = xpl1[i] rng = period_range('1987Q2', periods=int(n * 12), freq='M') ser = Series(np.random.randn(len(rng)), rng) _, ax = self.plt.subplots() @@ -432,6 +455,7 @@ def test_finder_monthly(self): xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] assert rs == xp + xp = xpl2[i] vmin, vmax = ax.get_xlim() ax.set_xlim(vmin + 0.9, vmax) rs = xaxis.get_majorticklocs()[0] @@ -450,7 +474,11 @@ def test_finder_monthly_long(self): @pytest.mark.slow def test_finder_annual(self): - xp = [1987, 1988, 1990, 1990, 1995, 2020, 2070, 2170] + if self.mpl_ge_2_0_0: + xp = [1986, 1986, 1990, 1990, 1995, 2020, 1970, 1970] + else: + xp = [1987, 1988, 1990, 1990, 1995, 2020, 2070, 2170] + for i, nyears in enumerate([5, 10, 19, 49, 99, 199, 599, 1001]): rng = period_range('1987', periods=nyears, freq='A') ser = Series(np.random.randn(len(rng)), rng) @@ -470,7 +498,10 @@ def test_finder_minutely(self): ser.plot(ax=ax) xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] - xp = Period('1/1/1999', freq='Min').ordinal + if self.mpl_ge_2_0_0: + xp = Period('1998-12-29 12:00', freq='Min').ordinal + else: + xp = Period('1/1/1999', freq='Min').ordinal assert rs == xp def test_finder_hourly(self): @@ -481,7 +512,10 @@ def test_finder_hourly(self): ser.plot(ax=ax) xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] - xp = Period('1/1/1999', freq='H').ordinal + if self.mpl_ge_2_0_0: + xp = Period('1998-12-31 22:00', freq='H').ordinal + else: + xp = Period('1/1/1999', freq='H').ordinal assert rs == xp @pytest.mark.slow @@ -1294,9 +1328,14 @@ def test_format_timedelta_ticks_narrow(self): if is_platform_mac(): pytest.skip("skip on mac for precision display issue on older mpl") - expected_labels = [ - '00:00:00.00000000{:d}'.format(i) - for i in range(10)] + if self.mpl_ge_2_0_0: + expected_labels = [''] + [ + '00:00:00.00000000{:d}'.format(2 * i) + for i in range(5)] + [''] + else: + expected_labels = [ + '00:00:00.00000000{:d}'.format(i) + for i in range(10)] rng = timedelta_range('0', periods=10, freq='ns') df = DataFrame(np.random.randn(len(rng), 3), rng) @@ -1312,17 +1351,32 @@ def test_format_timedelta_ticks_wide(self): if is_platform_mac(): pytest.skip("skip on mac for precision display issue on older mpl") - expected_labels = [ - '00:00:00', - '1 days 03:46:40', - '2 days 07:33:20', - '3 days 11:20:00', - '4 days 15:06:40', - '5 days 18:53:20', - '6 days 22:40:00', - '8 days 02:26:40', - '' - ] + if self.mpl_ge_2_0_0: + expected_labels = [ + '', + '00:00:00', + '1 days 03:46:40', + '2 days 07:33:20', + '3 days 11:20:00', + '4 days 15:06:40', + '5 days 18:53:20', + '6 days 22:40:00', + '8 days 02:26:40', + '9 days 06:13:20', + '' + ] + else: + expected_labels = [ + '00:00:00', + '1 days 03:46:40', + '2 days 07:33:20', + '3 days 11:20:00', + '4 days 15:06:40', + '5 days 18:53:20', + '6 days 22:40:00', + '8 days 02:26:40', + '' + ] rng = timedelta_range('0', periods=10, freq='1 d') df = DataFrame(np.random.randn(len(rng), 3), rng) From c440bc974ddef06727094f310a4cd8108219e78b Mon Sep 17 00:00:00 2001 From: Nis Martensen Date: Sat, 10 Jun 2017 14:55:56 +0200 Subject: [PATCH 5/6] TST: plotting: Relax some tests to work with matplotlib 2.0 Matplotlib 2.0 by default now adds some padding between the boundaries of the data and the boundaries of the plot. This causes some of our tests to fail if we don't relax them slightly. modified: pandas/tests/plotting/test_datetimelike.py test_irregular_ts_shared_ax_xlim test_mixed_freq_regular_first test_mixed_freq_regular_first_df test_secondary_y_irregular_ts_xlim test_secondary_y_non_ts_xlim test_secondary_y_regular_ts_xlim modified: pandas/tests/plotting/test_frame.py test_area_lim test_line_lim modified: pandas/tests/plotting/test_series.py test_ts_area_lim test_ts_line_lim --- pandas/tests/plotting/test_datetimelike.py | 20 +++++++++--------- pandas/tests/plotting/test_frame.py | 16 +++++++-------- pandas/tests/plotting/test_series.py | 24 +++++++++++----------- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 6e743489b67be..d66012e2a56a0 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -699,8 +699,8 @@ def test_mixed_freq_regular_first(self): assert idx2.equals(s2.index.to_period('B')) left, right = ax2.get_xlim() pidx = s1.index.to_period() - assert left == pidx[0].ordinal - assert right == pidx[-1].ordinal + assert left <= pidx[0].ordinal + assert right >= pidx[-1].ordinal @pytest.mark.slow def test_mixed_freq_irregular_first(self): @@ -730,8 +730,8 @@ def test_mixed_freq_regular_first_df(self): assert idx2.equals(s2.index.to_period('B')) left, right = ax2.get_xlim() pidx = s1.index.to_period() - assert left == pidx[0].ordinal - assert right == pidx[-1].ordinal + assert left <= pidx[0].ordinal + assert right >= pidx[-1].ordinal @pytest.mark.slow def test_mixed_freq_irregular_first_df(self): @@ -1245,8 +1245,8 @@ def test_irregular_ts_shared_ax_xlim(self): # check that axis limits are correct left, right = ax.get_xlim() - assert left == ts_irregular.index.min().toordinal() - assert right == ts_irregular.index.max().toordinal() + assert left <= ts_irregular.index.min().toordinal() + assert right >= ts_irregular.index.max().toordinal() @pytest.mark.slow def test_secondary_y_non_ts_xlim(self): @@ -1262,7 +1262,7 @@ def test_secondary_y_non_ts_xlim(self): s2.plot(secondary_y=True, ax=ax) left_after, right_after = ax.get_xlim() - assert left_before == left_after + assert left_before >= left_after assert right_before < right_after @pytest.mark.slow @@ -1279,7 +1279,7 @@ def test_secondary_y_regular_ts_xlim(self): s2.plot(secondary_y=True, ax=ax) left_after, right_after = ax.get_xlim() - assert left_before == left_after + assert left_before >= left_after assert right_before < right_after @pytest.mark.slow @@ -1312,8 +1312,8 @@ def test_secondary_y_irregular_ts_xlim(self): ts_irregular[:5].plot(ax=ax) left, right = ax.get_xlim() - assert left == ts_irregular.index.min().toordinal() - assert right == ts_irregular.index.max().toordinal() + assert left <= ts_irregular.index.min().toordinal() + assert right >= ts_irregular.index.max().toordinal() def test_plot_outofbounds_datetime(self): # 2579 - checking this does not raise diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 186100cdcb345..03b7fdfe3c729 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -758,14 +758,14 @@ def test_line_lim(self): ax = df.plot() xmin, xmax = ax.get_xlim() lines = ax.get_lines() - assert xmin == lines[0].get_data()[0][0] - assert xmax == lines[0].get_data()[0][-1] + assert xmin <= lines[0].get_data()[0][0] + assert xmax >= lines[0].get_data()[0][-1] ax = df.plot(secondary_y=True) xmin, xmax = ax.get_xlim() lines = ax.get_lines() - assert xmin == lines[0].get_data()[0][0] - assert xmax == lines[0].get_data()[0][-1] + assert xmin <= lines[0].get_data()[0][0] + assert xmax >= lines[0].get_data()[0][-1] axes = df.plot(secondary_y=True, subplots=True) self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) @@ -774,8 +774,8 @@ def test_line_lim(self): assert not hasattr(ax, 'right_ax') xmin, xmax = ax.get_xlim() lines = ax.get_lines() - assert xmin == lines[0].get_data()[0][0] - assert xmax == lines[0].get_data()[0][-1] + assert xmin <= lines[0].get_data()[0][0] + assert xmax >= lines[0].get_data()[0][-1] def test_area_lim(self): df = DataFrame(rand(6, 4), columns=['x', 'y', 'z', 'four']) @@ -786,8 +786,8 @@ def test_area_lim(self): xmin, xmax = ax.get_xlim() ymin, ymax = ax.get_ylim() lines = ax.get_lines() - assert xmin == lines[0].get_data()[0][0] - assert xmax == lines[0].get_data()[0][-1] + assert xmin <= lines[0].get_data()[0][0] + assert xmax >= lines[0].get_data()[0][-1] assert ymin == 0 ax = _check_plot_works(neg_df.plot.area, stacked=stacked) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 6b337649d4595..851c366f136dc 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -102,23 +102,23 @@ def test_ts_line_lim(self): ax = self.ts.plot(ax=ax) xmin, xmax = ax.get_xlim() lines = ax.get_lines() - assert xmin == lines[0].get_data(orig=False)[0][0] - assert xmax == lines[0].get_data(orig=False)[0][-1] + assert xmin <= lines[0].get_data(orig=False)[0][0] + assert xmax >= lines[0].get_data(orig=False)[0][-1] tm.close() ax = self.ts.plot(secondary_y=True, ax=ax) xmin, xmax = ax.get_xlim() lines = ax.get_lines() - assert xmin == lines[0].get_data(orig=False)[0][0] - assert xmax == lines[0].get_data(orig=False)[0][-1] + assert xmin <= lines[0].get_data(orig=False)[0][0] + assert xmax >= lines[0].get_data(orig=False)[0][-1] def test_ts_area_lim(self): _, ax = self.plt.subplots() ax = self.ts.plot.area(stacked=False, ax=ax) xmin, xmax = ax.get_xlim() line = ax.get_lines()[0].get_data(orig=False)[0] - assert xmin == line[0] - assert xmax == line[-1] + assert xmin <= line[0] + assert xmax >= line[-1] tm.close() # GH 7471 @@ -126,8 +126,8 @@ def test_ts_area_lim(self): ax = self.ts.plot.area(stacked=False, x_compat=True, ax=ax) xmin, xmax = ax.get_xlim() line = ax.get_lines()[0].get_data(orig=False)[0] - assert xmin == line[0] - assert xmax == line[-1] + assert xmin <= line[0] + assert xmax >= line[-1] tm.close() tz_ts = self.ts.copy() @@ -136,16 +136,16 @@ def test_ts_area_lim(self): ax = tz_ts.plot.area(stacked=False, x_compat=True, ax=ax) xmin, xmax = ax.get_xlim() line = ax.get_lines()[0].get_data(orig=False)[0] - assert xmin == line[0] - assert xmax == line[-1] + assert xmin <= line[0] + assert xmax >= line[-1] tm.close() _, ax = self.plt.subplots() ax = tz_ts.plot.area(stacked=False, secondary_y=True, ax=ax) xmin, xmax = ax.get_xlim() line = ax.get_lines()[0].get_data(orig=False)[0] - assert xmin == line[0] - assert xmax == line[-1] + assert xmin <= line[0] + assert xmax >= line[-1] def test_label(self): s = Series([1, 2]) From a46859852a6daa480f8483aec1d7b5fad166c594 Mon Sep 17 00:00:00 2001 From: Nis Martensen Date: Wed, 19 Jul 2017 18:52:44 +0200 Subject: [PATCH 6/6] DOC: lineplot/xlims whatsnew entry for v0.21.0 --- doc/source/whatsnew/v0.21.0.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 52e056103cbdc..ae4cfb7d1cca5 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -522,6 +522,8 @@ Plotting ^^^^^^^^ - Bug in plotting methods using ``secondary_y`` and ``fontsize`` not setting secondary axis font size (:issue:`12565`) - Bug when plotting ``timedelta`` and ``datetime`` dtypes on y-axis (:issue:`16953`) +- Line plots no longer assume monotonic x data when calculating xlims, they show the entire lines now even for unsorted x data. (:issue:`11310`)(:issue:`11471`) +- With matplotlib 2.0.0 and above, calculation of x limits for line plots is left to matplotlib, so that its new default settings are applied. (:issue:`15495`) Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^