Skip to content

Commit bac9a1b

Browse files
charlesdong1991jreback
authored andcommitted
ENH: Allow scatter plot to plot objects and datetime type data (pandas-dev#30434)
1 parent 3c8030f commit bac9a1b

File tree

3 files changed

+37
-17
lines changed

3 files changed

+37
-17
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -908,6 +908,7 @@ Plotting
908908
- :func:`set_option` now validates that the plot backend provided to ``'plotting.backend'`` implements the backend when the option is set, rather than when a plot is created (:issue:`28163`)
909909
- :meth:`DataFrame.plot` now allow a ``backend`` keyword argument to allow changing between backends in one session (:issue:`28619`).
910910
- Bug in color validation incorrectly raising for non-color styles (:issue:`29122`).
911+
- Allow :meth: `DataFrame.plot.scatter` to plot ``objects`` and ``datetime`` type data (:issue:`18755`, :issue:`30391`)
911912
- Bug in :meth:`DataFrame.hist`, ``xrot=0`` does not work with ``by`` and subplots (:issue:`30288`).
912913

913914
Groupby/resample/rolling

pandas/plotting/_matplotlib/core.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,10 @@ def _compute_plot_data(self):
395395
include_type = [np.number]
396396
exclude_type = ["timedelta"]
397397

398+
# GH 18755, include object and category type for scatter plot
399+
if self._kind == "scatter":
400+
include_type.extend(["object", "category"])
401+
398402
numeric_data = data.select_dtypes(include=include_type, exclude=exclude_type)
399403

400404
try:
@@ -866,10 +870,13 @@ def __init__(self, data, x, y, **kwargs):
866870
x = self.data.columns[x]
867871
if is_integer(y) and not self.data.columns.holds_integer():
868872
y = self.data.columns[y]
869-
if len(self.data[x]._get_numeric_data()) == 0:
870-
raise ValueError(self._kind + " requires x column to be numeric")
871-
if len(self.data[y]._get_numeric_data()) == 0:
872-
raise ValueError(self._kind + " requires y column to be numeric")
873+
874+
# Scatter plot allows to plot objects data
875+
if self._kind == "hexbin":
876+
if len(self.data[x]._get_numeric_data()) == 0:
877+
raise ValueError(self._kind + " requires x column to be numeric")
878+
if len(self.data[y]._get_numeric_data()) == 0:
879+
raise ValueError(self._kind + " requires y column to be numeric")
873880

874881
self.x = x
875882
self.y = y

pandas/tests/plotting/test_frame.py

+25-13
Original file line numberDiff line numberDiff line change
@@ -1162,6 +1162,27 @@ def test_plot_scatter(self):
11621162
axes = df.plot(x="x", y="y", kind="scatter", subplots=True)
11631163
self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
11641164

1165+
def test_scatterplot_datetime_data(self):
1166+
# GH 30391
1167+
dates = pd.date_range(start=date(2019, 1, 1), periods=12, freq="W")
1168+
vals = np.random.normal(0, 1, len(dates))
1169+
df = pd.DataFrame({"dates": dates, "vals": vals})
1170+
1171+
_check_plot_works(df.plot.scatter, x="dates", y="vals")
1172+
_check_plot_works(df.plot.scatter, x=0, y=1)
1173+
1174+
def test_scatterplot_object_data(self):
1175+
# GH 18755
1176+
df = pd.DataFrame(dict(a=["A", "B", "C"], b=[2, 3, 4]))
1177+
1178+
_check_plot_works(df.plot.scatter, x="a", y="b")
1179+
_check_plot_works(df.plot.scatter, x=0, y=1)
1180+
1181+
df = pd.DataFrame(dict(a=["A", "B", "C"], b=["a", "b", "c"]))
1182+
1183+
_check_plot_works(df.plot.scatter, x="a", y="b")
1184+
_check_plot_works(df.plot.scatter, x=0, y=1)
1185+
11651186
@pytest.mark.slow
11661187
def test_if_scatterplot_colorbar_affects_xaxis_visibility(self):
11671188
# addressing issue #10611, to ensure colobar does not
@@ -1216,24 +1237,15 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self):
12161237
colorbar_distance = axes_x_coords[3, :] - axes_x_coords[2, :]
12171238
assert np.isclose(parent_distance, colorbar_distance, atol=1e-7).all()
12181239

1240+
@pytest.mark.parametrize("x, y", [("x", "y"), ("y", "x"), ("y", "y")])
12191241
@pytest.mark.slow
1220-
def test_plot_scatter_with_categorical_data(self):
1221-
# GH 16199
1242+
def test_plot_scatter_with_categorical_data(self, x, y):
1243+
# after fixing GH 18755, should be able to plot categorical data
12221244
df = pd.DataFrame(
12231245
{"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])}
12241246
)
12251247

1226-
with pytest.raises(ValueError) as ve:
1227-
df.plot(x="x", y="y", kind="scatter")
1228-
ve.match("requires y column to be numeric")
1229-
1230-
with pytest.raises(ValueError) as ve:
1231-
df.plot(x="y", y="x", kind="scatter")
1232-
ve.match("requires x column to be numeric")
1233-
1234-
with pytest.raises(ValueError) as ve:
1235-
df.plot(x="y", y="y", kind="scatter")
1236-
ve.match("requires x column to be numeric")
1248+
_check_plot_works(df.plot.scatter, x=x, y=y)
12371249

12381250
@pytest.mark.slow
12391251
def test_plot_scatter_with_c(self):

0 commit comments

Comments
 (0)