From 19d64feed566cea20c1938398b8db2ef75a1df13 Mon Sep 17 00:00:00 2001 From: MasonGallo Date: Mon, 5 Mar 2018 14:47:01 -0500 Subject: [PATCH 1/6] Add support for list-like y argument --- pandas/plotting/_core.py | 29 ++++++++++++++++++++++------- pandas/tests/plotting/test_frame.py | 17 +++++++++++++---- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index b15c5271ae321..d38476eb54647 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1714,13 +1714,28 @@ def _plot(data, x=None, y=None, subplots=False, data = data.set_index(x) if y is not None: - if is_integer(y) and not data.columns.holds_integer(): + int_cols = is_integer(y) or any(is_integer(col) for col in y) + if int_cols and not data.columns.holds_integer(): y = data.columns[y] - elif not isinstance(data[y], ABCSeries): - raise ValueError("y must be a label or position") - label = kwds['label'] if 'label' in kwds else y - series = data[y].copy() # Don't modify - series.name = label + elif not isinstance(data[y], (ABCSeries, ABCDataFrame)): + raise ValueError( + "y must be a label or position or list of them" + ) + + label_kw = kwds['label'] if 'label' in kwds else False + new_data = data[y].copy() # Don't modify + + if isinstance(data[y], ABCSeries): + label_name = label_kw or y + new_data.name = label_name + else: + match = is_list_like(label_kw) and len(label_kw) == len(y) + if label_kw and not match: + raise ValueError( + "label should be list-like and same length as y" + ) + label_name = label_kw or data[y].columns + new_data.columns = label_name for kw in ['xerr', 'yerr']: if (kw in kwds) and \ @@ -1730,7 +1745,7 @@ def _plot(data, x=None, y=None, subplots=False, kwds[kw] = data[kwds[kw]] except (IndexError, KeyError, TypeError): pass - data = series + data = new_data plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) plot_obj.generate() diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 3d25b0b51e052..5e2a974ca8e14 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -2172,24 +2172,33 @@ def test_invalid_kind(self): @pytest.mark.parametrize("x,y", [ (['B', 'C'], 'A'), - ('A', ['B', 'C']) + (['A'], ['B', 'C']) ]) def test_invalid_xy_args(self, x, y): - # GH 18671 + # GH 18671, 19699 allows y to be list-like but not x df = DataFrame({"A": [1, 2], 'B': [3, 4], 'C': [5, 6]}) with pytest.raises(ValueError): df.plot(x=x, y=y) @pytest.mark.parametrize("x,y", [ ('A', 'B'), - ('B', 'A') + (['A'], 'B') ]) def test_invalid_xy_args_dup_cols(self, x, y): - # GH 18671 + # GH 18671, 19699 allows y to be list-like but not x df = DataFrame([[1, 3, 5], [2, 4, 6]], columns=list('AAB')) with pytest.raises(ValueError): df.plot(x=x, y=y) + @pytest.mark.parametrize("y,lbl", [ + (['B'], ['b']), + (['B', 'C'], ['b', 'c']) + ]) + def test_y_listlike(self, y, lbl): + # GH 19699 + df = DataFrame({"A": [1, 2], 'B': [3, 4], 'C': [5, 6]}) + _check_plot_works(df.plot, x='A', y=y, label=lbl) + @pytest.mark.slow def test_hexbin_basic(self): df = self.hexbin_df From 836133825278ce45cb7d94f09c9083d4b7fc8810 Mon Sep 17 00:00:00 2001 From: MasonGallo Date: Mon, 5 Mar 2018 14:47:12 -0500 Subject: [PATCH 2/6] update whatsnew --- doc/source/whatsnew/v0.23.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index a976fca66a57a..0f7f39a0d37d6 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -965,6 +965,7 @@ Plotting ^^^^^^^^ - :func:`DataFrame.plot` now raises a ``ValueError`` when the ``x`` or ``y`` argument is improperly formed (:issue:`18671`) +- :func:`DataFrame.plot` now supports multiple columns to the ``y`` argument (:issue:`19699`) - Bug in formatting tick labels with ``datetime.time()`` and fractional seconds (:issue:`18478`). - :meth:`Series.plot.kde` has exposed the args ``ind`` and ``bw_method`` in the docstring (:issue:`18461`). The argument ``ind`` may now also be an integer (number of sample points). From 20a2dc0412fd07d42897edd3dfe7dcfcdb46ede1 Mon Sep 17 00:00:00 2001 From: MasonGallo Date: Mon, 5 Mar 2018 14:59:49 -0500 Subject: [PATCH 3/6] add doc change for y --- pandas/plotting/_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index d38476eb54647..2fd741f105359 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1758,7 +1758,7 @@ def _plot(data, x=None, y=None, subplots=False, series_kind = "" df_coord = """x : label or position, default None - y : label or position, default None + y : label, position or list of label, positions, default None Allows plotting of one column versus another""" series_coord = "" From 9535b4cb2fcb410467f1af0ff1bd3eb6a79efe14 Mon Sep 17 00:00:00 2001 From: MasonGallo Date: Thu, 8 Mar 2018 17:03:44 -0500 Subject: [PATCH 4/6] Add test cases and fix position args --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/plotting/_core.py | 9 ++++---- pandas/tests/plotting/test_frame.py | 34 +++++++++++++++++++++-------- 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 0f7f39a0d37d6..bd82f18a9e786 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -966,6 +966,7 @@ Plotting - :func:`DataFrame.plot` now raises a ``ValueError`` when the ``x`` or ``y`` argument is improperly formed (:issue:`18671`) - :func:`DataFrame.plot` now supports multiple columns to the ``y`` argument (:issue:`19699`) +- Bug in :func:`DataFrame.plot` with ``x`` or ``y`` arguments as positions (:issue:`20056`) - Bug in formatting tick labels with ``datetime.time()`` and fractional seconds (:issue:`18478`). - :meth:`Series.plot.kde` has exposed the args ``ind`` and ``bw_method`` in the docstring (:issue:`18461`). The argument ``ind`` may now also be an integer (number of sample points). diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 2fd741f105359..9a22658c7b53c 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1706,15 +1706,16 @@ def _plot(data, x=None, y=None, subplots=False, plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) else: if isinstance(data, ABCDataFrame): + new_data = data.copy() # don't modify until necessary if x is not None: if is_integer(x) and not data.columns.holds_integer(): x = data.columns[x] elif not isinstance(data[x], ABCSeries): raise ValueError("x must be a label or position") - data = data.set_index(x) + new_data = data.set_index(x) if y is not None: - int_cols = is_integer(y) or any(is_integer(col) for col in y) + int_cols = is_integer(y) or all(is_integer(col) for col in y) if int_cols and not data.columns.holds_integer(): y = data.columns[y] elif not isinstance(data[y], (ABCSeries, ABCDataFrame)): @@ -1723,7 +1724,7 @@ def _plot(data, x=None, y=None, subplots=False, ) label_kw = kwds['label'] if 'label' in kwds else False - new_data = data[y].copy() # Don't modify + new_data = new_data[y].copy() if isinstance(data[y], ABCSeries): label_name = label_kw or y @@ -1745,7 +1746,7 @@ def _plot(data, x=None, y=None, subplots=False, kwds[kw] = data[kwds[kw]] except (IndexError, KeyError, TypeError): pass - data = new_data + data = new_data plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) plot_obj.generate() diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 5e2a974ca8e14..d41cc6bc195f6 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -2170,15 +2170,16 @@ def test_invalid_kind(self): with pytest.raises(ValueError): df.plot(kind='aasdf') - @pytest.mark.parametrize("x,y", [ - (['B', 'C'], 'A'), - (['A'], ['B', 'C']) + @pytest.mark.parametrize("x,y,lbl", [ + (['B', 'C'], 'A', 'a'), + (['A'], ['B', 'C'], ['b', 'c']), + ('A', ['B', 'C'], 'badlabel') ]) - def test_invalid_xy_args(self, x, y): + def test_invalid_xy_args(self, x, y, lbl): # GH 18671, 19699 allows y to be list-like but not x df = DataFrame({"A": [1, 2], 'B': [3, 4], 'C': [5, 6]}) with pytest.raises(ValueError): - df.plot(x=x, y=y) + df.plot(x=x, y=y, label=lbl) @pytest.mark.parametrize("x,y", [ ('A', 'B'), @@ -2190,15 +2191,30 @@ def test_invalid_xy_args_dup_cols(self, x, y): with pytest.raises(ValueError): df.plot(x=x, y=y) - @pytest.mark.parametrize("y,lbl", [ - (['B'], ['b']), - (['B', 'C'], ['b', 'c']) + @pytest.mark.parametrize("x,y,lbl,colors", [ + ('A', ['B'], ['b'], ['red']), + ('A', ['B', 'C'], ['b', 'c'], ['red', 'blue']), + (0, [1, 2], ['bokeh', 'cython'], ['green', 'yellow']) ]) - def test_y_listlike(self, y, lbl): + def test_y_listlike(self, x, y, lbl, colors): # GH 19699 df = DataFrame({"A": [1, 2], 'B': [3, 4], 'C': [5, 6]}) _check_plot_works(df.plot, x='A', y=y, label=lbl) + ax = df.plot(x=x, y=y, label=lbl, color=colors) + assert len(ax.lines) == len(y) + self._check_colors(ax.get_lines(), linecolors=colors) + + @pytest.mark.parametrize("x,y,colnames", [ + (0, 1, ['A', 'B']), + (1, 0, [0, 1]) + ]) + def test_xy_args_integer(self, x, y, colnames): + # GH 20056 + df = DataFrame({"A": [1, 2], 'B': [3, 4]}) + df.columns = colnames + _check_plot_works(df.plot, x=x, y=y) + @pytest.mark.slow def test_hexbin_basic(self): df = self.hexbin_df From 7d7d74e6f535b132c839be9ce8a0de307043d58b Mon Sep 17 00:00:00 2001 From: MasonGallo Date: Mon, 19 Mar 2018 11:54:55 -0400 Subject: [PATCH 5/6] don't copy save cols ahead of time and update whatsnew --- doc/source/whatsnew/v0.23.0.txt | 4 ++-- pandas/plotting/_core.py | 17 ++++++++--------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index bd82f18a9e786..314cee65f879d 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -965,8 +965,7 @@ Plotting ^^^^^^^^ - :func:`DataFrame.plot` now raises a ``ValueError`` when the ``x`` or ``y`` argument is improperly formed (:issue:`18671`) -- :func:`DataFrame.plot` now supports multiple columns to the ``y`` argument (:issue:`19699`) -- Bug in :func:`DataFrame.plot` with ``x`` or ``y`` arguments as positions (:issue:`20056`) +- Bug in :func:`DataFrame.plot` when ``x`` and ``y`` arguments given as positions caused incorrect referenced columns for line, bar and area plots (:issue:`20056`) - Bug in formatting tick labels with ``datetime.time()`` and fractional seconds (:issue:`18478`). - :meth:`Series.plot.kde` has exposed the args ``ind`` and ``bw_method`` in the docstring (:issue:`18461`). The argument ``ind`` may now also be an integer (number of sample points). @@ -1021,3 +1020,4 @@ Other - Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`) - Bug in accessing a :func:`pandas.get_option`, which raised ``KeyError`` rather than ``OptionError`` when looking up a non-existant option key in some cases (:issue:`19789`) +- :func:`DataFrame.plot` now supports multiple columns to the ``y`` argument (:issue:`19699`) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 9a22658c7b53c..81c5235138457 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1706,25 +1706,24 @@ def _plot(data, x=None, y=None, subplots=False, plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) else: if isinstance(data, ABCDataFrame): - new_data = data.copy() # don't modify until necessary + data_cols = data.columns if x is not None: if is_integer(x) and not data.columns.holds_integer(): - x = data.columns[x] + x = data_cols[x] elif not isinstance(data[x], ABCSeries): raise ValueError("x must be a label or position") - new_data = data.set_index(x) + data = data.set_index(x) if y is not None: - int_cols = is_integer(y) or all(is_integer(col) for col in y) - if int_cols and not data.columns.holds_integer(): - y = data.columns[y] + int_y = is_integer(y) or all(is_integer(c) for c in y) + if int_y and not data.columns.holds_integer(): + y = data_cols[y] elif not isinstance(data[y], (ABCSeries, ABCDataFrame)): raise ValueError( "y must be a label or position or list of them" ) - label_kw = kwds['label'] if 'label' in kwds else False - new_data = new_data[y].copy() + new_data = data[y].copy() if isinstance(data[y], ABCSeries): label_name = label_kw or y @@ -1746,7 +1745,7 @@ def _plot(data, x=None, y=None, subplots=False, kwds[kw] = data[kwds[kw]] except (IndexError, KeyError, TypeError): pass - data = new_data + data = new_data plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) plot_obj.generate() From 37a6eca220ee3314b6a3917546618d0e68da0ff8 Mon Sep 17 00:00:00 2001 From: MasonGallo Date: Tue, 20 Mar 2018 18:58:40 -0400 Subject: [PATCH 6/6] address fdbck --- pandas/plotting/_core.py | 40 ++++++++++++++--------------- pandas/tests/plotting/test_frame.py | 4 +-- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 1128aee252604..21b69522523cc 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1760,37 +1760,37 @@ def _plot(data, x=None, y=None, subplots=False, data = data.set_index(x) if y is not None: - int_y = is_integer(y) or all(is_integer(c) for c in y) - if int_y and not data.columns.holds_integer(): + # check if we have y as int or list of ints + int_ylist = is_list_like(y) and all(is_integer(c) for c in y) + int_y_arg = is_integer(y) or int_ylist + if int_y_arg and not data.columns.holds_integer(): y = data_cols[y] - elif not isinstance(data[y], (ABCSeries, ABCDataFrame)): - raise ValueError( - "y must be a label or position or list of them" - ) + label_kw = kwds['label'] if 'label' in kwds else False - new_data = data[y].copy() + for kw in ['xerr', 'yerr']: + if (kw in kwds) and \ + (isinstance(kwds[kw], string_types) or + is_integer(kwds[kw])): + try: + kwds[kw] = data[kwds[kw]] + except (IndexError, KeyError, TypeError): + pass - if isinstance(data[y], ABCSeries): + # don't overwrite + data = data[y].copy() + + if isinstance(data, ABCSeries): label_name = label_kw or y - new_data.name = label_name + data.name = label_name else: match = is_list_like(label_kw) and len(label_kw) == len(y) if label_kw and not match: raise ValueError( "label should be list-like and same length as y" ) - label_name = label_kw or data[y].columns - new_data.columns = label_name + label_name = label_kw or data.columns + data.columns = label_name - for kw in ['xerr', 'yerr']: - if (kw in kwds) and \ - (isinstance(kwds[kw], string_types) or - is_integer(kwds[kw])): - try: - kwds[kw] = data[kwds[kw]] - except (IndexError, KeyError, TypeError): - pass - data = new_data plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) plot_obj.generate() diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 5f72665719fad..ac02f5f4e4283 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -2197,7 +2197,7 @@ def test_invalid_xy_args_dup_cols(self, x, y): (0, [1, 2], ['bokeh', 'cython'], ['green', 'yellow']) ]) def test_y_listlike(self, x, y, lbl, colors): - # GH 19699 + # GH 19699: tests list-like y and verifies lbls & colors df = DataFrame({"A": [1, 2], 'B': [3, 4], 'C': [5, 6]}) _check_plot_works(df.plot, x='A', y=y, label=lbl) @@ -2210,7 +2210,7 @@ def test_y_listlike(self, x, y, lbl, colors): (1, 0, [0, 1]) ]) def test_xy_args_integer(self, x, y, colnames): - # GH 20056 + # GH 20056: tests integer args for xy and checks col names df = DataFrame({"A": [1, 2], 'B': [3, 4]}) df.columns = colnames _check_plot_works(df.plot, x=x, y=y)