Skip to content

BUG: Fixes plotting with nullable integers (#32073) #38014

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Dec 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,7 @@ Plotting
indexed by a :class:`.TimedeltaIndex` with a fixed frequency and the x-axis lower limit was greater than the upper limit (:issue:`37454`)
- Bug in :meth:`.DataFrameGroupBy.boxplot` when ``subplots=False`` would raise a ``KeyError`` (:issue:`16748`)
- Bug in :meth:`DataFrame.plot` and :meth:`Series.plot` was overwriting matplotlib's shared y axes behaviour when no ``sharey`` parameter was passed (:issue:`37942`)
- Bug in :meth:`DataFrame.plot` was raising a ``TypeError`` with ``ExtensionDtype`` columns (:issue:`32073`)


Groupby/resample/rolling
Expand Down
25 changes: 18 additions & 7 deletions pandas/plotting/_matplotlib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,12 @@
from pandas.util._decorators import cache_readonly

from pandas.core.dtypes.common import (
is_extension_array_dtype,
is_float,
is_float_dtype,
is_hashable,
is_integer,
is_integer_dtype,
is_iterator,
is_list_like,
is_number,
Expand Down Expand Up @@ -383,6 +386,20 @@ def result(self):
else:
return self.axes[0]

def _convert_to_ndarray(self, data):
# GH32073: cast to float if values contain nulled integers
if (
is_integer_dtype(data.dtype) or is_float_dtype(data.dtype)
) and is_extension_array_dtype(data.dtype):
return data.to_numpy(dtype="float", na_value=np.nan)

# GH25587: cast ExtensionArray of pandas (IntegerArray, etc.) to
# np.ndarray before plot.
if len(data) > 0:
return np.asarray(data)

return data

def _compute_plot_data(self):
data = self.data

Expand Down Expand Up @@ -423,13 +440,7 @@ def _compute_plot_data(self):
if is_empty:
raise TypeError("no numeric data to plot")

# GH25587: cast ExtensionArray of pandas (IntegerArray, etc.) to
# np.ndarray before plot.
numeric_data = numeric_data.copy()
for col in numeric_data:
numeric_data[col] = np.asarray(numeric_data[col])

self.data = numeric_data
self.data = numeric_data.apply(self._convert_to_ndarray)

def _make_plot(self):
raise AbstractMethodError(self)
Expand Down
22 changes: 22 additions & 0 deletions pandas/tests/plotting/frame/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,28 @@ def test_plot(self):
result = ax.axes
assert result is axes[0]

def test_nullable_int_plot(self):
# GH 32073
dates = ["2008", "2009", None, "2011", "2012"]
df = DataFrame(
{
"A": [1, 2, 3, 4, 5],
"B": [1.0, 2.0, 3.0, 4.0, 5.0],
"C": [7, 5, np.nan, 3, 2],
"D": pd.to_datetime(dates, format="%Y"),
"E": pd.to_datetime(dates, format="%Y", utc=True),
},
dtype=np.int64,
)

_check_plot_works(df.plot, x="A", y="B")
_check_plot_works(df[["A", "B"]].plot, x="A", y="B")
_check_plot_works(df[["C", "A"]].plot, x="C", y="A") # nullable value on x-axis
_check_plot_works(df[["A", "C"]].plot, x="A", y="C")
_check_plot_works(df[["B", "C"]].plot, x="B", y="C")
_check_plot_works(df[["A", "D"]].plot, x="A", y="D")
_check_plot_works(df[["A", "E"]].plot, x="A", y="E")

def test_integer_array_plot(self):
# GH 25587
arr = integer_array([1, 2, 3, 4], dtype="UInt32")
Expand Down