-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
API & BUG: allow list-like y argument to df.plot & fix integer arg to x,y #20000
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
19d64fe
8361338
20a2dc0
9535b4c
7d7d74e
d6d824f
37a6eca
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1751,21 +1751,36 @@ def _plot(data, x=None, y=None, subplots=False, | |
plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) | ||
else: | ||
if isinstance(data, ABCDataFrame): | ||
data_cols = data.columns | ||
if x is not None: | ||
if is_integer(x) and not data.columns.holds_integer(): | ||
x = data.columns[x] | ||
x = data_cols[x] | ||
elif not isinstance(data[x], ABCSeries): | ||
raise ValueError("x must be a label or position") | ||
data = data.set_index(x) | ||
|
||
if y is not None: | ||
if is_integer(y) and not data.columns.holds_integer(): | ||
y = data.columns[y] | ||
elif not isinstance(data[y], ABCSeries): | ||
raise ValueError("y must be a label or position") | ||
label = kwds['label'] if 'label' in kwds else y | ||
series = data[y].copy() # Don't modify | ||
series.name = label | ||
int_y = is_integer(y) or all(is_integer(c) for c in y) | ||
if int_y and not data.columns.holds_integer(): | ||
y = data_cols[y] | ||
elif not isinstance(data[y], (ABCSeries, ABCDataFrame)): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. where is the test for this? (as you expanded to both Series & DataFrame)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good point, removing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are you actually selecting There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, this can probably be removed / replaced? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. agreed, will remove |
||
raise ValueError( | ||
"y must be a label or position or list of them" | ||
) | ||
label_kw = kwds['label'] if 'label' in kwds else False | ||
new_data = data[y].copy() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just call this data |
||
|
||
if isinstance(data[y], ABCSeries): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can just test if y is_scalar There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Duplicate column names may mess that up, not sure if we allow that here though. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
right, that's why I have the check for |
||
label_name = label_kw or y | ||
new_data.name = label_name | ||
else: | ||
match = is_list_like(label_kw) and len(label_kw) == len(y) | ||
if label_kw and not match: | ||
raise ValueError( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a test that raises this assertion? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. where is this test? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
"label should be list-like and same length as y" | ||
) | ||
label_name = label_kw or data[y].columns | ||
new_data.columns = label_name | ||
|
||
for kw in ['xerr', 'yerr']: | ||
if (kw in kwds) and \ | ||
|
@@ -1775,7 +1790,7 @@ def _plot(data, x=None, y=None, subplots=False, | |
kwds[kw] = data[kwds[kw]] | ||
except (IndexError, KeyError, TypeError): | ||
pass | ||
data = series | ||
data = new_data | ||
plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) | ||
|
||
plot_obj.generate() | ||
|
@@ -1788,7 +1803,7 @@ def _plot(data, x=None, y=None, subplots=False, | |
series_kind = "" | ||
|
||
df_coord = """x : label or position, default None | ||
y : label or position, default None | ||
y : label, position or list of label, positions, default None | ||
Allows plotting of one column versus another""" | ||
series_coord = "" | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2170,26 +2170,51 @@ def test_invalid_kind(self): | |
with pytest.raises(ValueError): | ||
df.plot(kind='aasdf') | ||
|
||
@pytest.mark.parametrize("x,y", [ | ||
(['B', 'C'], 'A'), | ||
('A', ['B', 'C']) | ||
@pytest.mark.parametrize("x,y,lbl", [ | ||
(['B', 'C'], 'A', 'a'), | ||
(['A'], ['B', 'C'], ['b', 'c']), | ||
('A', ['B', 'C'], 'badlabel') | ||
]) | ||
def test_invalid_xy_args(self, x, y): | ||
# GH 18671 | ||
def test_invalid_xy_args(self, x, y, lbl): | ||
# GH 18671, 19699 allows y to be list-like but not x | ||
df = DataFrame({"A": [1, 2], 'B': [3, 4], 'C': [5, 6]}) | ||
with pytest.raises(ValueError): | ||
df.plot(x=x, y=y) | ||
df.plot(x=x, y=y, label=lbl) | ||
|
||
@pytest.mark.parametrize("x,y", [ | ||
('A', 'B'), | ||
('B', 'A') | ||
(['A'], 'B') | ||
]) | ||
def test_invalid_xy_args_dup_cols(self, x, y): | ||
# GH 18671 | ||
# GH 18671, 19699 allows y to be list-like but not x | ||
df = DataFrame([[1, 3, 5], [2, 4, 6]], columns=list('AAB')) | ||
with pytest.raises(ValueError): | ||
df.plot(x=x, y=y) | ||
|
||
@pytest.mark.parametrize("x,y,lbl,colors", [ | ||
('A', ['B'], ['b'], ['red']), | ||
('A', ['B', 'C'], ['b', 'c'], ['red', 'blue']), | ||
(0, [1, 2], ['bokeh', 'cython'], ['green', 'yellow']) | ||
]) | ||
def test_y_listlike(self, x, y, lbl, colors): | ||
# GH 19699 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you give a 1-liner expln |
||
df = DataFrame({"A": [1, 2], 'B': [3, 4], 'C': [5, 6]}) | ||
_check_plot_works(df.plot, x='A', y=y, label=lbl) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you get the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should also check the color. What happens? Ideally it's the same as |
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add tests for
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Good point. Shouldn't this raise tho? If you try this on a |
||
ax = df.plot(x=x, y=y, label=lbl, color=colors) | ||
assert len(ax.lines) == len(y) | ||
self._check_colors(ax.get_lines(), linecolors=colors) | ||
|
||
@pytest.mark.parametrize("x,y,colnames", [ | ||
(0, 1, ['A', 'B']), | ||
(1, 0, [0, 1]) | ||
]) | ||
def test_xy_args_integer(self, x, y, colnames): | ||
# GH 20056 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same |
||
df = DataFrame({"A": [1, 2], 'B': [3, 4]}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we do you think this should be allowed? having both labels and positions is so very confusing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree that it's confusing and definitely don't think it should be allowed. Let's remove support for positions? |
||
df.columns = colnames | ||
_check_plot_works(df.plot, x=x, y=y) | ||
|
||
@pytest.mark.slow | ||
def test_hexbin_basic(self): | ||
df = self.hexbin_df | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks fragile. For better or worse (mostly worse) you can pretty much any object in pandas columns, e.g.
df = pd.DataFrame({pd.Timestamp('2017'): [1, 2]})
.I think your code will fail for
y=pd.Timestamp('2017')
, since it's not an integer, but it also isn't iterable.I'd recommend adding
is_list_like(y)
before theall
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
good call