Skip to content

ENH: Allow scatter plot to plot objects and datetime type data #30434

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jan 1, 2020
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -824,6 +824,7 @@ Plotting
- :func:`set_option` now validates that the plot backend provided to ``'plotting.backend'`` implements the backend when the option is set, rather than when a plot is created (:issue:`28163`)
- :meth:`DataFrame.plot` now allow a ``backend`` keyword arugment to allow changing between backends in one session (:issue:`28619`).
- Bug in color validation incorrectly raising for non-color styles (:issue:`29122`).
- Allow :meth: `DataFrame.plot` to plot ``objects`` and ``datetime`` type data (:issue:`18755`, :issue:`30391`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is more specific yes? it only applies to scatter right

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks for taking a look! @jreback indeed, should have been more specific.

Updated.


Groupby/resample/rolling
^^^^^^^^^^^^^^^^^^^^^^^^
Expand Down
15 changes: 11 additions & 4 deletions pandas/plotting/_matplotlib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,10 @@ def _compute_plot_data(self):
include_type = [np.number]
exclude_type = ["timedelta"]

# GH 18755, include object and category type for scatter plot
if self._kind == "scatter":
include_type.extend(["object", "category"])

numeric_data = data.select_dtypes(include=include_type, exclude=exclude_type)

try:
Expand Down Expand Up @@ -872,10 +876,13 @@ def __init__(self, data, x, y, **kwargs):
x = self.data.columns[x]
if is_integer(y) and not self.data.columns.holds_integer():
y = self.data.columns[y]
if len(self.data[x]._get_numeric_data()) == 0:
raise ValueError(self._kind + " requires x column to be numeric")
if len(self.data[y]._get_numeric_data()) == 0:
raise ValueError(self._kind + " requires y column to be numeric")

# Scatter plot allows to plot objects data
if self._kind == "hexbin":
if len(self.data[x]._get_numeric_data()) == 0:
raise ValueError(self._kind + " requires x column to be numeric")
if len(self.data[y]._get_numeric_data()) == 0:
raise ValueError(self._kind + " requires y column to be numeric")

self.x = x
self.y = y
Expand Down
38 changes: 25 additions & 13 deletions pandas/tests/plotting/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1162,6 +1162,27 @@ def test_plot_scatter(self):
axes = df.plot(x="x", y="y", kind="scatter", subplots=True)
self._check_axes_shape(axes, axes_num=1, layout=(1, 1))

def test_scatterplot_datetime_data(self):
# GH 30391
dates = pd.date_range(start=date(2019, 1, 1), periods=12, freq="W")
vals = np.random.normal(0, 1, len(dates))
df = pd.DataFrame({"dates": dates, "vals": vals})

_check_plot_works(df.plot.scatter, x="dates", y="vals")
_check_plot_works(df.plot.scatter, x=0, y=1)

def test_scatterplot_object_data(self):
# GH 18755
df = pd.DataFrame(dict(a=["A", "B", "C"], b=[2, 3, 4]))

_check_plot_works(df.plot.scatter, x="a", y="b")
_check_plot_works(df.plot.scatter, x=0, y=1)

df = pd.DataFrame(dict(a=["A", "B", "C"], b=["a", "b", "c"]))

_check_plot_works(df.plot.scatter, x="a", y="b")
_check_plot_works(df.plot.scatter, x=0, y=1)

@pytest.mark.slow
def test_if_scatterplot_colorbar_affects_xaxis_visibility(self):
# addressing issue #10611, to ensure colobar does not
Expand Down Expand Up @@ -1216,24 +1237,15 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self):
colorbar_distance = axes_x_coords[3, :] - axes_x_coords[2, :]
assert np.isclose(parent_distance, colorbar_distance, atol=1e-7).all()

@pytest.mark.parametrize("x, y", [("x", "y"), ("y", "x"), ("y", "y")])
@pytest.mark.slow
def test_plot_scatter_with_categorical_data(self):
# GH 16199
def test_plot_scatter_with_categorical_data(self, x, y):
# after fixing GH 18755, should be able to plot categorical data
df = pd.DataFrame(
{"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])}
)

with pytest.raises(ValueError) as ve:
df.plot(x="x", y="y", kind="scatter")
ve.match("requires y column to be numeric")

with pytest.raises(ValueError) as ve:
df.plot(x="y", y="x", kind="scatter")
ve.match("requires x column to be numeric")

with pytest.raises(ValueError) as ve:
df.plot(x="y", y="y", kind="scatter")
ve.match("requires x column to be numeric")
_check_plot_works(df.plot.scatter, x=x, y=y)

@pytest.mark.slow
def test_plot_scatter_with_c(self):
Expand Down