Skip to content

ENH: categorical scatter plot #34293

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Jan 27, 2021
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
d8d8c3c
add legend with colors if coloring by categorical
MarcoGorelli May 21, 2020
a167313
add legend with colors if coloring by categorical
MarcoGorelli May 21, 2020
d10c5c1
add legend with colors if coloring by categorical
MarcoGorelli May 21, 2020
7294aa2
add legend with colors if coloring by categorical
MarcoGorelli May 21, 2020
50cd05f
add legend with colors if coloring by categorical
MarcoGorelli May 21, 2020
4579142
add legend with colors if coloring by categorical
MarcoGorelli May 21, 2020
6d3fe9e
add test
MarcoGorelli May 21, 2020
3846804
revert empty line
MarcoGorelli May 21, 2020
ef4b03d
discrete colorbar in case of ordered categorical
MarcoGorelli May 22, 2020
cf218f7
cleanup
MarcoGorelli May 22, 2020
7aae164
cleanup
MarcoGorelli May 22, 2020
0ab903d
plot colorbar in both cases
MarcoGorelli May 22, 2020
b93ddf1
update test
MarcoGorelli May 22, 2020
5fbc117
update test
MarcoGorelli May 22, 2020
572ecfc
Merge remote-tracking branch 'upstream/master' into categorical-scatter
MarcoGorelli Jan 3, 2021
b2a8b28
:art:
MarcoGorelli Jan 3, 2021
efaaae6
Merge remote-tracking branch 'upstream/master' into categorical-scatter
MarcoGorelli Jan 17, 2021
b0a8cfa
simplify logic
MarcoGorelli Jan 17, 2021
4c15a83
whatsnew entry
MarcoGorelli Jan 17, 2021
b65b103
Merge remote-tracking branch 'upstream/master' into categorical-scatter
MarcoGorelli Jan 18, 2021
6560bb0
Merge remote-tracking branch 'upstream/master' into categorical-scatter
MarcoGorelli Jan 24, 2021
cce3461
add example to visualisation
MarcoGorelli Jan 26, 2021
6e01091
add versionadded tag
MarcoGorelli Jan 27, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions pandas/plotting/_matplotlib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pandas.util._decorators import cache_readonly

from pandas.core.dtypes.common import (
is_categorical_dtype,
is_hashable,
is_integer,
is_iterator,
Expand Down Expand Up @@ -413,7 +414,8 @@ def _compute_plot_data(self):
# np.ndarray before plot.
numeric_data = numeric_data.copy()
for col in numeric_data:
numeric_data[col] = np.asarray(numeric_data[col])
if not is_categorical_dtype(numeric_data[col]):
numeric_data[col] = np.asarray(numeric_data[col])

self.data = numeric_data

Expand Down Expand Up @@ -965,7 +967,10 @@ def _make_plot(self):
elif color is not None:
c_values = color
elif c_is_column:
c_values = self.data[c].values
if not is_categorical_dtype(self.data[c]):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit but instead of not is_categorical_dtype can you leave this as is_categorcial_dtype and switch the branches?

c_values = self.data[c].values
else:
c_values = self.data[c].cat.codes
else:
c_values = c

Expand All @@ -983,7 +988,19 @@ def _make_plot(self):
)
if cb:
cbar_label = c if c_is_column else ""
self._plot_colorbar(ax, label=cbar_label)
if not is_categorical_dtype(self.data[c]):
self._plot_colorbar(ax, label=cbar_label)
else:
handles = [
self.plt.scatter(
[],
[],
color=scatter.cmap(scatter.norm(i)),
label=self.data[c].cat.categories[i],
)
for i in self.data[c].cat.codes.unique()
]
ax.legend(handles=handles, title=cbar_label)

if label is not None:
self._add_legend_handle(scatter, label)
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/plotting/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1190,6 +1190,16 @@ def test_scatterplot_object_data(self):
_check_plot_works(df.plot.scatter, x="a", y="b")
_check_plot_works(df.plot.scatter, x=0, y=1)

def test_scatterplot_color_by_categorical(self):
df = pd.DataFrame(
[[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]],
columns=["length", "width"],
)
df["species"] = pd.Categorical(
["setosa", "setosa", "virginica", "virginica", "versicolor"]
)
_check_plot_works(df.plot.scatter, x=0, y=1, c="species")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there way within the test to check the legend is drawn and the appropriate colors are applied?


@pytest.mark.slow
def test_if_scatterplot_colorbar_affects_xaxis_visibility(self):
# addressing issue #10611, to ensure colobar does not
Expand Down