From 4c5eddd63e94bacddb96bf61f81a6a8fcd9c33f0 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 20 Aug 2020 21:19:10 -0700 Subject: [PATCH 1/7] REF: remove unnecesary try/except --- pandas/core/groupby/generic.py | 69 ++++++++++++++++------------------ 1 file changed, 33 insertions(+), 36 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 166631e69f523..51532a75d2d4a 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -31,7 +31,7 @@ import numpy as np from pandas._libs import lib -from pandas._typing import FrameOrSeries, FrameOrSeriesUnion +from pandas._typing import ArrayLike, FrameOrSeries, FrameOrSeriesUnion from pandas.util._decorators import Appender, Substitution, doc from pandas.core.dtypes.cast import ( @@ -60,6 +60,7 @@ validate_func_kwargs, ) import pandas.core.algorithms as algorithms +from pandas.core.arrays import ExtensionArray from pandas.core.base import DataError, SpecificationError import pandas.core.common as com from pandas.core.construction import create_series_with_explicit_dtype @@ -1034,32 +1035,31 @@ def _cython_agg_blocks( no_result = object() - def cast_result_block(result, block: "Block", how: str) -> "Block": - # see if we can cast the block to the desired dtype + def cast_agg_result(result, values: ArrayLike, how: str) -> ArrayLike: + # see if we can cast the values to the desired dtype # this may not be the original dtype assert not isinstance(result, DataFrame) assert result is not no_result - dtype = maybe_cast_result_dtype(block.dtype, how) + dtype = maybe_cast_result_dtype(values.dtype, how) result = maybe_downcast_numeric(result, dtype) - if block.is_extension and isinstance(result, np.ndarray): - # e.g. block.values was an IntegerArray - # (1, N) case can occur if block.values was Categorical + if isinstance(values, ExtensionArray) and isinstance(result, np.ndarray): + # e.g. values was an IntegerArray + # (1, N) case can occur if values was Categorical # and result is ndarray[object] # TODO(EA2D): special casing not needed with 2D EAs assert result.ndim == 1 or result.shape[0] == 1 try: # Cast back if feasible - result = type(block.values)._from_sequence( - result.ravel(), dtype=block.values.dtype + result = type(values)._from_sequence( + result.ravel(), dtype=values.dtype ) except (ValueError, TypeError): # reshape to be valid for non-Extension Block result = result.reshape(1, -1) - agg_block: "Block" = block.make_block(result) - return agg_block + return result def blk_func(block: "Block") -> List["Block"]: new_blocks: List["Block"] = [] @@ -1093,33 +1093,30 @@ def blk_func(block: "Block") -> List["Block"]: # Categoricals. This will done by later self._reindex_output() # Doing it here creates an error. See GH#34951 sgb = get_groupby(obj, self.grouper, observed=True) - try: - result = sgb.aggregate(lambda x: alt(x, axis=self.axis)) - except TypeError: - # we may have an exception in trying to aggregate - # continue and exclude the block - raise + result = sgb.aggregate(lambda x: alt(x, axis=self.axis)) + + result = cast(DataFrame, result) + # unwrap DataFrame to get array + if len(result._mgr.blocks) != 1: + # We've split an object block! Everything we've assumed + # about a single block input returning a single block output + # is a lie. To keep the code-path for the typical non-split case + # clean, we choose to clean up this mess later on. + assert len(locs) == result.shape[1] + for i, loc in enumerate(locs): + agg_block = result.iloc[:, [i]]._mgr.blocks[0] + agg_block.mgr_locs = [loc] + new_blocks.append(agg_block) else: - result = cast(DataFrame, result) - # unwrap DataFrame to get array - if len(result._mgr.blocks) != 1: - # We've split an object block! Everything we've assumed - # about a single block input returning a single block output - # is a lie. To keep the code-path for the typical non-split case - # clean, we choose to clean up this mess later on. - assert len(locs) == result.shape[1] - for i, loc in enumerate(locs): - agg_block = result.iloc[:, [i]]._mgr.blocks[0] - agg_block.mgr_locs = [loc] - new_blocks.append(agg_block) - else: - result = result._mgr.blocks[0].values - if isinstance(result, np.ndarray) and result.ndim == 1: - result = result.reshape(1, -1) - agg_block = cast_result_block(result, block, how) - new_blocks = [agg_block] + result = result._mgr.blocks[0].values + if isinstance(result, np.ndarray) and result.ndim == 1: + result = result.reshape(1, -1) + res_values = cast_agg_result(result, block.values, how) + agg_block = block.make_block(res_values) + new_blocks = [agg_block] else: - agg_block = cast_result_block(result, block, how) + res_values = cast_agg_result(result, block.values, how) + agg_block = block.make_block(res_values) new_blocks = [agg_block] return new_blocks From 42649fbb855a895ee5818d7dc80bdbd0ce0e9f5a Mon Sep 17 00:00:00 2001 From: Karthik Mathur <22126205+mathurk1@users.noreply.github.com> Date: Fri, 21 Aug 2020 17:34:51 -0500 Subject: [PATCH 2/7] TST: add test for agg on ordered categorical cols (#35630) --- .../tests/groupby/aggregate/test_aggregate.py | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index ce9d4b892d775..8fe450fe6abfc 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1063,6 +1063,85 @@ def test_groupby_get_by_index(): pd.testing.assert_frame_equal(res, expected) +@pytest.mark.parametrize( + "grp_col_dict, exp_data", + [ + ({"nr": "min", "cat_ord": "min"}, {"nr": [1, 5], "cat_ord": ["a", "c"]}), + ({"cat_ord": "min"}, {"cat_ord": ["a", "c"]}), + ({"nr": "min"}, {"nr": [1, 5]}), + ], +) +def test_groupby_single_agg_cat_cols(grp_col_dict, exp_data): + # test single aggregations on ordered categorical cols GHGH27800 + + # create the result dataframe + input_df = pd.DataFrame( + { + "nr": [1, 2, 3, 4, 5, 6, 7, 8], + "cat_ord": list("aabbccdd"), + "cat": list("aaaabbbb"), + } + ) + + input_df = input_df.astype({"cat": "category", "cat_ord": "category"}) + input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered() + result_df = input_df.groupby("cat").agg(grp_col_dict) + + # create expected dataframe + cat_index = pd.CategoricalIndex( + ["a", "b"], categories=["a", "b"], ordered=False, name="cat", dtype="category" + ) + + expected_df = pd.DataFrame(data=exp_data, index=cat_index) + + tm.assert_frame_equal(result_df, expected_df) + + +@pytest.mark.parametrize( + "grp_col_dict, exp_data", + [ + ({"nr": ["min", "max"], "cat_ord": "min"}, [(1, 4, "a"), (5, 8, "c")]), + ({"nr": "min", "cat_ord": ["min", "max"]}, [(1, "a", "b"), (5, "c", "d")]), + ({"cat_ord": ["min", "max"]}, [("a", "b"), ("c", "d")]), + ], +) +def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data): + # test combined aggregations on ordered categorical cols GH27800 + + # create the result dataframe + input_df = pd.DataFrame( + { + "nr": [1, 2, 3, 4, 5, 6, 7, 8], + "cat_ord": list("aabbccdd"), + "cat": list("aaaabbbb"), + } + ) + + input_df = input_df.astype({"cat": "category", "cat_ord": "category"}) + input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered() + result_df = input_df.groupby("cat").agg(grp_col_dict) + + # create expected dataframe + cat_index = pd.CategoricalIndex( + ["a", "b"], categories=["a", "b"], ordered=False, name="cat", dtype="category" + ) + + # unpack the grp_col_dict to create the multi-index tuple + # this tuple will be used to create the expected dataframe index + multi_index_list = [] + for k, v in grp_col_dict.items(): + if isinstance(v, list): + for value in v: + multi_index_list.append([k, value]) + else: + multi_index_list.append([k, v]) + multi_index = pd.MultiIndex.from_tuples(tuple(multi_index_list)) + + expected_df = pd.DataFrame(data=exp_data, columns=multi_index, index=cat_index) + + tm.assert_frame_equal(result_df, expected_df) + + def test_nonagg_agg(): # GH 35490 - Single/Multiple agg of non-agg function give same results # TODO: agg should raise for functions that don't aggregate From 47121ddc1c655f428c6c3fcea8fbf02eba85600a Mon Sep 17 00:00:00 2001 From: tkmz-n <60312218+tkmz-n@users.noreply.github.com> Date: Sat, 22 Aug 2020 07:42:50 +0900 Subject: [PATCH 3/7] TST: resample does not yield empty groups (#10603) (#35799) --- pandas/tests/resample/test_timedelta.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py index 0fbb60c176b30..3fa85e62d028c 100644 --- a/pandas/tests/resample/test_timedelta.py +++ b/pandas/tests/resample/test_timedelta.py @@ -150,3 +150,18 @@ def test_resample_timedelta_edge_case(start, end, freq, resample_freq): tm.assert_index_equal(result.index, expected_index) assert result.index.freq == expected_index.freq assert not np.isnan(result[-1]) + + +def test_resample_with_timedelta_yields_no_empty_groups(): + # GH 10603 + df = pd.DataFrame( + np.random.normal(size=(10000, 4)), + index=pd.timedelta_range(start="0s", periods=10000, freq="3906250n"), + ) + result = df.loc["1s":, :].resample("3s").apply(lambda x: len(x)) + + expected = pd.DataFrame( + [[768.0] * 4] * 12 + [[528.0] * 4], + index=pd.timedelta_range(start="1s", periods=13, freq="3s"), + ) + tm.assert_frame_equal(result, expected) From 1decb3e0ee1923a29b8eded7507bcb783b3870d0 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 21 Aug 2020 18:48:02 -0700 Subject: [PATCH 4/7] revert accidental rebase --- pandas/core/groupby/generic.py | 61 ++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 4b1f6cfe0a662..60e23b14eaf09 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -30,7 +30,7 @@ import numpy as np from pandas._libs import lib -from pandas._typing import ArrayLike, FrameOrSeries, FrameOrSeriesUnion +from pandas._typing import FrameOrSeries, FrameOrSeriesUnion from pandas.util._decorators import Appender, Substitution, doc from pandas.core.dtypes.cast import ( @@ -59,7 +59,6 @@ validate_func_kwargs, ) import pandas.core.algorithms as algorithms -from pandas.core.arrays import ExtensionArray from pandas.core.base import DataError, SpecificationError import pandas.core.common as com from pandas.core.construction import create_series_with_explicit_dtype @@ -1034,31 +1033,32 @@ def _cython_agg_blocks( no_result = object() - def cast_agg_result(result, values: ArrayLike, how: str) -> ArrayLike: - # see if we can cast the values to the desired dtype + def cast_result_block(result, block: "Block", how: str) -> "Block": + # see if we can cast the block to the desired dtype # this may not be the original dtype assert not isinstance(result, DataFrame) assert result is not no_result - dtype = maybe_cast_result_dtype(values.dtype, how) + dtype = maybe_cast_result_dtype(block.dtype, how) result = maybe_downcast_numeric(result, dtype) - if isinstance(values, ExtensionArray) and isinstance(result, np.ndarray): - # e.g. values was an IntegerArray - # (1, N) case can occur if values was Categorical + if block.is_extension and isinstance(result, np.ndarray): + # e.g. block.values was an IntegerArray + # (1, N) case can occur if block.values was Categorical # and result is ndarray[object] # TODO(EA2D): special casing not needed with 2D EAs assert result.ndim == 1 or result.shape[0] == 1 try: # Cast back if feasible - result = type(values)._from_sequence( - result.ravel(), dtype=values.dtype + result = type(block.values)._from_sequence( + result.ravel(), dtype=block.values.dtype ) except (ValueError, TypeError): # reshape to be valid for non-Extension Block result = result.reshape(1, -1) - return result + agg_block: "Block" = block.make_block(result) + return agg_block def blk_func(block: "Block") -> List["Block"]: new_blocks: List["Block"] = [] @@ -1092,25 +1092,28 @@ def blk_func(block: "Block") -> List["Block"]: # Categoricals. This will done by later self._reindex_output() # Doing it here creates an error. See GH#34951 sgb = get_groupby(obj, self.grouper, observed=True) - result = sgb.aggregate(lambda x: alt(x, axis=self.axis)) - - assert isinstance(result, (Series, DataFrame)) # for mypy - # In the case of object dtype block, it may have been split - # in the operation. We un-split here. - result = result._consolidate() - assert isinstance(result, (Series, DataFrame)) # for mypy - assert len(result._mgr.blocks) == 1 - - # unwrap DataFrame to get array - result = result._mgr.blocks[0].values - if isinstance(result, np.ndarray) and result.ndim == 1: - result = result.reshape(1, -1) - res_values = cast_agg_result(result, block.values, how) - agg_block = block.make_block(res_values) - new_blocks = [agg_block] + try: + result = sgb.aggregate(lambda x: alt(x, axis=self.axis)) + except TypeError: + # we may have an exception in trying to aggregate + # continue and exclude the block + raise + else: + assert isinstance(result, (Series, DataFrame)) # for mypy + # In the case of object dtype block, it may have been split + # in the operation. We un-split here. + result = result._consolidate() + assert isinstance(result, (Series, DataFrame)) # for mypy + assert len(result._mgr.blocks) == 1 + + # unwrap DataFrame to get array + result = result._mgr.blocks[0].values + if isinstance(result, np.ndarray) and result.ndim == 1: + result = result.reshape(1, -1) + agg_block = cast_result_block(result, block, how) + new_blocks = [agg_block] else: - res_values = cast_agg_result(result, block.values, how) - agg_block = block.make_block(res_values) + agg_block = cast_result_block(result, block, how) new_blocks = [agg_block] return new_blocks From 60ca29f83909b873129cac957153ad6fc465d2de Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 31 Aug 2020 20:18:19 -0700 Subject: [PATCH 5/7] de-privatize --- pandas/plotting/_matplotlib/boxplot.py | 16 +++++++-------- pandas/plotting/_matplotlib/core.py | 24 +++++++++++----------- pandas/plotting/_matplotlib/hist.py | 20 ++++++++++-------- pandas/plotting/_matplotlib/misc.py | 14 ++++++------- pandas/plotting/_matplotlib/style.py | 2 +- pandas/plotting/_matplotlib/tools.py | 20 +++++++++--------- pandas/tests/plotting/common.py | 9 +++++---- pandas/tests/plotting/test_misc.py | 26 ++++++++++++------------ pandas/tests/plotting/test_series.py | 28 +++++++++++++------------- 9 files changed, 82 insertions(+), 77 deletions(-) diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index b33daf39de37c..e79d59f2a18b8 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -11,8 +11,8 @@ from pandas.io.formats.printing import pprint_thing from pandas.plotting._matplotlib.core import LinePlot, MPLPlot -from pandas.plotting._matplotlib.style import _get_standard_colors -from pandas.plotting._matplotlib.tools import _flatten, _subplots +from pandas.plotting._matplotlib.style import get_standard_colors +from pandas.plotting._matplotlib.tools import create_subplots, flatten_axes class BoxPlot(LinePlot): @@ -80,7 +80,7 @@ def _validate_color_args(self): self.color = None # get standard colors for default - colors = _get_standard_colors(num_colors=3, colormap=self.colormap, color=None) + colors = get_standard_colors(num_colors=3, colormap=self.colormap, color=None) # use 2 colors by default, for box/whisker and median # flier colors isn't needed here # because it can be specified by ``sym`` kw @@ -196,11 +196,11 @@ def _grouped_plot_by_column( by = [by] columns = data._get_numeric_data().columns.difference(by) naxes = len(columns) - fig, axes = _subplots( + fig, axes = create_subplots( naxes=naxes, sharex=True, sharey=True, figsize=figsize, ax=ax, layout=layout ) - _axes = _flatten(axes) + _axes = flatten_axes(axes) ax_values = [] @@ -255,7 +255,7 @@ def _get_colors(): # num_colors=3 is required as method maybe_color_bp takes the colors # in positions 0 and 2. # if colors not provided, use same defaults as DataFrame.plot.box - result = _get_standard_colors(num_colors=3) + result = get_standard_colors(num_colors=3) result = np.take(result, [0, 0, 2]) result = np.append(result, "k") @@ -410,7 +410,7 @@ def boxplot_frame_groupby( ): if subplots is True: naxes = len(grouped) - fig, axes = _subplots( + fig, axes = create_subplots( naxes=naxes, squeeze=False, ax=ax, @@ -419,7 +419,7 @@ def boxplot_frame_groupby( figsize=figsize, layout=layout, ) - axes = _flatten(axes) + axes = flatten_axes(axes) ret = pd.Series(dtype=object) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 4d23a5e5fc249..f1dd829b561be 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -32,14 +32,14 @@ from pandas.io.formats.printing import pprint_thing from pandas.plotting._matplotlib.compat import _mpl_ge_3_0_0 from pandas.plotting._matplotlib.converter import register_pandas_matplotlib_converters -from pandas.plotting._matplotlib.style import _get_standard_colors +from pandas.plotting._matplotlib.style import get_standard_colors from pandas.plotting._matplotlib.tools import ( - _flatten, - _get_all_lines, - _get_xlim, - _handle_shared_axes, - _subplots, + create_subplots, + flatten_axes, format_date_labels, + get_all_lines, + get_xlim, + handle_shared_axes, table, ) @@ -317,7 +317,7 @@ def _maybe_right_yaxis(self, ax, axes_num): def _setup_subplots(self): if self.subplots: - fig, axes = _subplots( + fig, axes = create_subplots( naxes=self.nseries, sharex=self.sharex, sharey=self.sharey, @@ -336,7 +336,7 @@ def _setup_subplots(self): fig.set_size_inches(self.figsize) axes = self.ax - axes = _flatten(axes) + axes = flatten_axes(axes) valid_log = {False, True, "sym", None} input_log = {self.logx, self.logy, self.loglog} @@ -468,7 +468,7 @@ def _adorn_subplots(self): if len(self.axes) > 0: all_axes = self._get_subplots() nrows, ncols = self._get_axes_layout() - _handle_shared_axes( + handle_shared_axes( axarr=all_axes, nplots=len(all_axes), naxes=nrows * ncols, @@ -755,7 +755,7 @@ def _get_colors(self, num_colors=None, color_kwds="color"): if num_colors is None: num_colors = self.nseries - return _get_standard_colors( + return get_standard_colors( num_colors=num_colors, colormap=self.colormap, color=self.kwds.get(color_kwds), @@ -1134,8 +1134,8 @@ def _make_plot(self): # reset of xlim should be used for ts data # TODO: GH28021, should find a way to change view limit on xaxis - lines = _get_all_lines(ax) - left, right = _get_xlim(lines) + lines = get_all_lines(ax) + left, right = get_xlim(lines) ax.set_xlim(left, right) @classmethod diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index ee41479b3c7c9..3f5a3cd287c3e 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -6,7 +6,11 @@ from pandas.io.formats.printing import pprint_thing from pandas.plotting._matplotlib.core import LinePlot, MPLPlot -from pandas.plotting._matplotlib.tools import _flatten, _set_ticks_props, _subplots +from pandas.plotting._matplotlib.tools import ( + create_subplots, + flatten_axes, + set_ticks_props, +) class HistPlot(LinePlot): @@ -193,11 +197,11 @@ def _grouped_plot( grouped = grouped[column] naxes = len(grouped) - fig, axes = _subplots( + fig, axes = create_subplots( naxes=naxes, figsize=figsize, sharex=sharex, sharey=sharey, ax=ax, layout=layout ) - _axes = _flatten(axes) + _axes = flatten_axes(axes) for i, (key, group) in enumerate(grouped): ax = _axes[i] @@ -281,7 +285,7 @@ def plot_group(group, ax): rot=rot, ) - _set_ticks_props( + set_ticks_props( axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot ) @@ -332,7 +336,7 @@ def hist_series( ax.grid(grid) axes = np.array([ax]) - _set_ticks_props( + set_ticks_props( axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot ) @@ -414,7 +418,7 @@ def hist_frame( if naxes == 0: raise ValueError("hist method requires numerical columns, nothing to plot.") - fig, axes = _subplots( + fig, axes = create_subplots( naxes=naxes, ax=ax, squeeze=False, @@ -423,7 +427,7 @@ def hist_frame( figsize=figsize, layout=layout, ) - _axes = _flatten(axes) + _axes = flatten_axes(axes) can_set_label = "label" not in kwds @@ -437,7 +441,7 @@ def hist_frame( if legend: ax.legend() - _set_ticks_props( + set_ticks_props( axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot ) fig.subplots_adjust(wspace=0.3, hspace=0.3) diff --git a/pandas/plotting/_matplotlib/misc.py b/pandas/plotting/_matplotlib/misc.py index bb6530b0f6412..ff3501d20092d 100644 --- a/pandas/plotting/_matplotlib/misc.py +++ b/pandas/plotting/_matplotlib/misc.py @@ -7,8 +7,8 @@ from pandas.core.dtypes.missing import notna from pandas.io.formats.printing import pprint_thing -from pandas.plotting._matplotlib.style import _get_standard_colors -from pandas.plotting._matplotlib.tools import _set_ticks_props, _subplots +from pandas.plotting._matplotlib.style import get_standard_colors +from pandas.plotting._matplotlib.tools import create_subplots, set_ticks_props def scatter_matrix( @@ -27,7 +27,7 @@ def scatter_matrix( df = frame._get_numeric_data() n = df.columns.size naxes = n * n - fig, axes = _subplots(naxes=naxes, figsize=figsize, ax=ax, squeeze=False) + fig, axes = create_subplots(naxes=naxes, figsize=figsize, ax=ax, squeeze=False) # no gaps between subplots fig.subplots_adjust(wspace=0, hspace=0) @@ -103,7 +103,7 @@ def scatter_matrix( locs = locs.astype(int) axes[0][0].yaxis.set_ticklabels(locs) - _set_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) + set_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) return axes @@ -131,7 +131,7 @@ def normalize(series): ax = plt.gca(xlim=[-1, 1], ylim=[-1, 1]) to_plot = {} - colors = _get_standard_colors( + colors = get_standard_colors( num_colors=len(classes), colormap=colormap, color_type="random", color=color ) @@ -233,7 +233,7 @@ def f(t): t = np.linspace(-np.pi, np.pi, samples) used_legends = set() - color_values = _get_standard_colors( + color_values = get_standard_colors( num_colors=len(classes), colormap=colormap, color_type="random", color=color ) colors = dict(zip(classes, color_values)) @@ -354,7 +354,7 @@ def parallel_coordinates( if ax is None: ax = plt.gca() - color_values = _get_standard_colors( + color_values = get_standard_colors( num_colors=len(classes), colormap=colormap, color_type="random", color=color ) diff --git a/pandas/plotting/_matplotlib/style.py b/pandas/plotting/_matplotlib/style.py index 7990bff4f517c..54a2ed59d55c9 100644 --- a/pandas/plotting/_matplotlib/style.py +++ b/pandas/plotting/_matplotlib/style.py @@ -10,7 +10,7 @@ import pandas.core.common as com -def _get_standard_colors( +def get_standard_colors( num_colors=None, colormap=None, color_type="default", color=None ): import matplotlib.pyplot as plt diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py index 4d643ffb734e4..98aaab6838fba 100644 --- a/pandas/plotting/_matplotlib/tools.py +++ b/pandas/plotting/_matplotlib/tools.py @@ -100,7 +100,7 @@ def _get_layout(nplots: int, layout=None, layout_type: str = "box") -> Tuple[int # copied from matplotlib/pyplot.py and modified for pandas.plotting -def _subplots( +def create_subplots( naxes: int, sharex: bool = False, sharey: bool = False, @@ -194,7 +194,7 @@ def _subplots( fig = plt.figure(**fig_kw) else: if is_list_like(ax): - ax = _flatten(ax) + ax = flatten_axes(ax) if layout is not None: warnings.warn( "When passing multiple axes, layout keyword is ignored", UserWarning @@ -221,7 +221,7 @@ def _subplots( if squeeze: return fig, ax else: - return fig, _flatten(ax) + return fig, flatten_axes(ax) else: warnings.warn( "To output multiple subplots, the figure containing " @@ -264,7 +264,7 @@ def _subplots( for ax in axarr[naxes:]: ax.set_visible(False) - _handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey) + handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey) if squeeze: # Reshape the array to have the final desired dimension (nrow,ncol), @@ -297,7 +297,7 @@ def _remove_labels_from_axis(axis: "Axis"): axis.get_label().set_visible(False) -def _handle_shared_axes( +def handle_shared_axes( axarr: Iterable["Axes"], nplots: int, naxes: int, @@ -351,7 +351,7 @@ def _handle_shared_axes( _remove_labels_from_axis(ax.yaxis) -def _flatten(axes: Union["Axes", Sequence["Axes"]]) -> Sequence["Axes"]: +def flatten_axes(axes: Union["Axes", Sequence["Axes"]]) -> Sequence["Axes"]: if not is_list_like(axes): return np.array([axes]) elif isinstance(axes, (np.ndarray, ABCIndexClass)): @@ -359,7 +359,7 @@ def _flatten(axes: Union["Axes", Sequence["Axes"]]) -> Sequence["Axes"]: return np.array(axes) -def _set_ticks_props( +def set_ticks_props( axes: Union["Axes", Sequence["Axes"]], xlabelsize=None, xrot=None, @@ -368,7 +368,7 @@ def _set_ticks_props( ): import matplotlib.pyplot as plt - for ax in _flatten(axes): + for ax in flatten_axes(axes): if xlabelsize is not None: plt.setp(ax.get_xticklabels(), fontsize=xlabelsize) if xrot is not None: @@ -380,7 +380,7 @@ def _set_ticks_props( return axes -def _get_all_lines(ax: "Axes") -> List["Line2D"]: +def get_all_lines(ax: "Axes") -> List["Line2D"]: lines = ax.get_lines() if hasattr(ax, "right_ax"): @@ -392,7 +392,7 @@ def _get_all_lines(ax: "Axes") -> List["Line2D"]: return lines -def _get_xlim(lines: Iterable["Line2D"]) -> Tuple[float, float]: +def get_xlim(lines: Iterable["Line2D"]) -> Tuple[float, float]: left, right = np.inf, -np.inf for l in lines: x = l.get_xdata(orig=False) diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 3b1ff233c5ec1..34cfb48c58c77 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -13,6 +13,7 @@ from pandas import DataFrame, Series import pandas._testing as tm + """ This is a common base class used for various plotting tests """ @@ -330,7 +331,7 @@ def _check_axes_shape(self, axes, axes_num=None, layout=None, figsize=None): figsize : tuple expected figsize. default is matplotlib default """ - from pandas.plotting._matplotlib.tools import _flatten + from pandas.plotting._matplotlib.tools import flatten_axes if figsize is None: figsize = self.default_figsize @@ -343,7 +344,7 @@ def _check_axes_shape(self, axes, axes_num=None, layout=None, figsize=None): assert len(ax.get_children()) > 0 if layout is not None: - result = self._get_axes_layout(_flatten(axes)) + result = self._get_axes_layout(flatten_axes(axes)) assert result == layout tm.assert_numpy_array_equal( @@ -370,9 +371,9 @@ def _flatten_visible(self, axes): axes : matplotlib Axes object, or its list-like """ - from pandas.plotting._matplotlib.tools import _flatten + from pandas.plotting._matplotlib.tools import flatten_axes - axes = _flatten(axes) + axes = flatten_axes(axes) axes = [ax for ax in axes if ax.get_visible()] return axes diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index f5c1c58f3f7ed..e7184271aa9dd 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -349,11 +349,11 @@ def test_subplot_titles(self, iris): title_list = [ax.get_title() for sublist in plot for ax in sublist] assert title_list == title[:3] + [""] - def test_get_standard_colors_random_seed(self): + def testget_standard_colors_random_seed(self): # GH17525 df = DataFrame(np.zeros((10, 10))) - # Make sure that the random seed isn't reset by _get_standard_colors + # Make sure that the random seed isn't reset by get_standard_colors plotting.parallel_coordinates(df, 0) rand1 = random.random() plotting.parallel_coordinates(df, 0) @@ -361,19 +361,19 @@ def test_get_standard_colors_random_seed(self): assert rand1 != rand2 # Make sure it produces the same colors every time it's called - from pandas.plotting._matplotlib.style import _get_standard_colors + from pandas.plotting._matplotlib.style import get_standard_colors - color1 = _get_standard_colors(1, color_type="random") - color2 = _get_standard_colors(1, color_type="random") + color1 = get_standard_colors(1, color_type="random") + color2 = get_standard_colors(1, color_type="random") assert color1 == color2 - def test_get_standard_colors_default_num_colors(self): - from pandas.plotting._matplotlib.style import _get_standard_colors + def testget_standard_colors_default_num_colors(self): + from pandas.plotting._matplotlib.style import get_standard_colors # Make sure the default color_types returns the specified amount - color1 = _get_standard_colors(1, color_type="default") - color2 = _get_standard_colors(9, color_type="default") - color3 = _get_standard_colors(20, color_type="default") + color1 = get_standard_colors(1, color_type="default") + color2 = get_standard_colors(9, color_type="default") + color3 = get_standard_colors(20, color_type="default") assert len(color1) == 1 assert len(color2) == 9 assert len(color3) == 20 @@ -394,17 +394,17 @@ def test_plot_single_color(self): colors = [rect.get_facecolor() for rect in ax.get_children()[0:3]] assert all(color == colors[0] for color in colors) - def test_get_standard_colors_no_appending(self): + def testget_standard_colors_no_appending(self): # GH20726 # Make sure not to add more colors so that matplotlib can cycle # correctly. from matplotlib import cm - from pandas.plotting._matplotlib.style import _get_standard_colors + from pandas.plotting._matplotlib.style import get_standard_colors color_before = cm.gnuplot(range(5)) - color_after = _get_standard_colors(1, color=color_before) + color_after = get_standard_colors(1, color=color_before) assert len(color_after) == len(color_before) df = DataFrame(np.random.randn(48, 4), columns=list("ABCD")) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index cc00626e992f3..c296e2a6278c5 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -809,53 +809,53 @@ def test_series_grid_settings(self): @pytest.mark.slow def test_standard_colors(self): - from pandas.plotting._matplotlib.style import _get_standard_colors + from pandas.plotting._matplotlib.style import get_standard_colors for c in ["r", "red", "green", "#FF0000"]: - result = _get_standard_colors(1, color=c) + result = get_standard_colors(1, color=c) assert result == [c] - result = _get_standard_colors(1, color=[c]) + result = get_standard_colors(1, color=[c]) assert result == [c] - result = _get_standard_colors(3, color=c) + result = get_standard_colors(3, color=c) assert result == [c] * 3 - result = _get_standard_colors(3, color=[c]) + result = get_standard_colors(3, color=[c]) assert result == [c] * 3 @pytest.mark.slow def test_standard_colors_all(self): import matplotlib.colors as colors - from pandas.plotting._matplotlib.style import _get_standard_colors + from pandas.plotting._matplotlib.style import get_standard_colors # multiple colors like mediumaquamarine for c in colors.cnames: - result = _get_standard_colors(num_colors=1, color=c) + result = get_standard_colors(num_colors=1, color=c) assert result == [c] - result = _get_standard_colors(num_colors=1, color=[c]) + result = get_standard_colors(num_colors=1, color=[c]) assert result == [c] - result = _get_standard_colors(num_colors=3, color=c) + result = get_standard_colors(num_colors=3, color=c) assert result == [c] * 3 - result = _get_standard_colors(num_colors=3, color=[c]) + result = get_standard_colors(num_colors=3, color=[c]) assert result == [c] * 3 # single letter colors like k for c in colors.ColorConverter.colors: - result = _get_standard_colors(num_colors=1, color=c) + result = get_standard_colors(num_colors=1, color=c) assert result == [c] - result = _get_standard_colors(num_colors=1, color=[c]) + result = get_standard_colors(num_colors=1, color=[c]) assert result == [c] - result = _get_standard_colors(num_colors=3, color=c) + result = get_standard_colors(num_colors=3, color=c) assert result == [c] * 3 - result = _get_standard_colors(num_colors=3, color=[c]) + result = get_standard_colors(num_colors=3, color=[c]) assert result == [c] * 3 def test_series_plot_color_kwargs(self): From 2a1eda492c932f935bbb134fa79ebb5b920867cd Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 1 Sep 2020 13:04:04 -0700 Subject: [PATCH 6/7] docstring inside class, isort fixup --- pandas/tests/plotting/common.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 34cfb48c58c77..b753c96af6290 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -14,13 +14,12 @@ import pandas._testing as tm -""" -This is a common base class used for various plotting tests -""" - - @td.skip_if_no_mpl class TestPlotBase: + """ + This is a common base class used for various plotting tests + """ + def setup_method(self, method): import matplotlib as mpl From 3270e2bc174c58e3f6cdfe4b607c09b394df3f98 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 1 Sep 2020 18:04:44 -0700 Subject: [PATCH 7/7] typo fixup --- pandas/tests/plotting/test_misc.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index e7184271aa9dd..130acaa8bcd58 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -349,7 +349,7 @@ def test_subplot_titles(self, iris): title_list = [ax.get_title() for sublist in plot for ax in sublist] assert title_list == title[:3] + [""] - def testget_standard_colors_random_seed(self): + def test_get_standard_colors_random_seed(self): # GH17525 df = DataFrame(np.zeros((10, 10))) @@ -367,7 +367,7 @@ def testget_standard_colors_random_seed(self): color2 = get_standard_colors(1, color_type="random") assert color1 == color2 - def testget_standard_colors_default_num_colors(self): + def test_get_standard_colors_default_num_colors(self): from pandas.plotting._matplotlib.style import get_standard_colors # Make sure the default color_types returns the specified amount @@ -394,7 +394,7 @@ def test_plot_single_color(self): colors = [rect.get_facecolor() for rect in ax.get_children()[0:3]] assert all(color == colors[0] for color in colors) - def testget_standard_colors_no_appending(self): + def test_get_standard_colors_no_appending(self): # GH20726 # Make sure not to add more colors so that matplotlib can cycle