From 3e8b0dab00f13d8a95964863d07a239938a25774 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 18 Aug 2022 21:13:59 -0700 Subject: [PATCH 1/4] Catch more test warnings --- pandas/core/interchange/from_dataframe.py | 2 +- pandas/tests/groupby/test_counting.py | 26 +++++++++---------- pandas/tests/groupby/test_function.py | 3 ++- pandas/tests/indexes/multi/test_setops.py | 7 +++-- pandas/tests/io/sas/test_sas7bdat.py | 2 +- .../tests/plotting/frame/test_hist_box_by.py | 18 ++++++++----- 6 files changed, 34 insertions(+), 24 deletions(-) diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py index 6e1b2de10e8e6..4602819b4834a 100644 --- a/pandas/core/interchange/from_dataframe.py +++ b/pandas/core/interchange/from_dataframe.py @@ -497,7 +497,7 @@ def set_nulls( null_pos = None if null_kind == ColumnNullType.USE_SENTINEL: - null_pos = data == sentinel_val + null_pos = pd.Series(data) == sentinel_val elif null_kind in (ColumnNullType.USE_BITMASK, ColumnNullType.USE_BYTEMASK): assert validity, "Expected to have a validity buffer for the mask" valid_buff, valid_dtype = validity diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py index f0a3219d0b419..5dda57767a6dc 100644 --- a/pandas/tests/groupby/test_counting.py +++ b/pandas/tests/groupby/test_counting.py @@ -188,21 +188,21 @@ def test_ngroup_cumcount_pair(self): tm.assert_series_equal(g.ngroup(), Series(ngroupd)) tm.assert_series_equal(g.cumcount(), Series(cumcounted)) - def test_ngroup_respects_groupby_order(self): + @pytest.mark.parametrize("sort_flag", [False, True]) + def test_ngroup_respects_groupby_order(self, sort_flag): np.random.seed(0) df = DataFrame({"a": np.random.choice(list("abcdef"), 100)}) - for sort_flag in (False, True): - g = df.groupby(["a"], sort=sort_flag) - df["group_id"] = -1 - df["group_index"] = -1 - - for i, (_, group) in enumerate(g): - df.loc[group.index, "group_id"] = i - for j, ind in enumerate(group.index): - df.loc[ind, "group_index"] = j - - tm.assert_series_equal(Series(df["group_id"].values), g.ngroup()) - tm.assert_series_equal(Series(df["group_index"].values), g.cumcount()) + g = df.groupby("a", sort=sort_flag) + df["group_id"] = -1 + df["group_index"] = -1 + + for i, (_, group) in enumerate(g): + df.loc[group.index, "group_id"] = i + for j, ind in enumerate(group.index): + df.loc[ind, "group_index"] = j + + tm.assert_series_equal(Series(df["group_id"].values), g.ngroup()) + tm.assert_series_equal(Series(df["group_index"].values), g.cumcount()) @pytest.mark.parametrize( "datetimelike", diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 4b9f5deb40849..7ba22c09cd26d 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1590,10 +1590,11 @@ def test_corrwith_with_1_axis(): tm.assert_series_equal(result, expected) +@pytest.mark.filterwarnings("ignore:.* is deprecated:FutureWarning") def test_multiindex_group_all_columns_when_empty(groupby_func): # GH 32464 df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"]) - gb = df.groupby(["a", "b", "c"]) + gb = df.groupby(["a", "b", "c"], group_keys=False) method = getattr(gb, groupby_func) args = get_groupby_method_args(groupby_func, df) diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index 39b5e0ffc526c..7940677db0bf3 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -571,5 +571,8 @@ def test_intersection_lexsort_depth(levels1, levels2, codes1, codes2, names): mi1 = MultiIndex(levels=levels1, codes=codes1, names=names) mi2 = MultiIndex(levels=levels2, codes=codes2, names=names) mi_int = mi1.intersection(mi2) - - assert mi_int.lexsort_depth == 0 + with tm.assert_produces_warning( + FutureWarning, + match="MultiIndex.lexsort_depth is deprecated as a public function", + ): + assert mi_int.lexsort_depth == 0 diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 41b2e78d093ea..2b7ecbcdf9f80 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -33,7 +33,7 @@ def data_test_ix(request, dirpath): for k in range(df.shape[1]): col = df.iloc[:, k] if col.dtype == np.int64: - df.iloc[:, k] = df.iloc[:, k].astype(np.float64) + df.isetitem(k, df.iloc[:, k].astype(np.float64)) return df, test_ix diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py index e568016c858fd..49711ea7635da 100644 --- a/pandas/tests/plotting/frame/test_hist_box_by.py +++ b/pandas/tests/plotting/frame/test_hist_box_by.py @@ -83,7 +83,9 @@ class TestHistWithBy(TestPlotBase): ) def test_hist_plot_by_argument(self, by, column, titles, legends, hist_df): # GH 15079 - axes = _check_plot_works(hist_df.plot.hist, column=column, by=by) + axes = _check_plot_works( + hist_df.plot.hist, column=column, by=by, default_axes=True + ) result_titles = [ax.get_title() for ax in axes] result_legends = [ [legend.get_text() for legend in ax.get_legend().texts] for ax in axes @@ -120,7 +122,7 @@ def test_hist_plot_by_0(self, by, column, titles, legends, hist_df): df = hist_df.copy() df = df.rename(columns={"C": 0}) - axes = _check_plot_works(df.plot.hist, column=column, by=by) + axes = _check_plot_works(df.plot.hist, default_axes=True, column=column, by=by) result_titles = [ax.get_title() for ax in axes] result_legends = [ [legend.get_text() for legend in ax.get_legend().texts] for ax in axes @@ -142,7 +144,9 @@ def test_hist_plot_empty_list_string_tuple_by(self, by, column, hist_df): # GH 15079 msg = "No group keys passed" with pytest.raises(ValueError, match=msg): - _check_plot_works(hist_df.plot.hist, column=column, by=by) + _check_plot_works( + hist_df.plot.hist, default_axes=True, column=column, by=by + ) @pytest.mark.slow @pytest.mark.parametrize( @@ -274,7 +278,9 @@ class TestBoxWithBy(TestPlotBase): ) def test_box_plot_by_argument(self, by, column, titles, xticklabels, hist_df): # GH 15079 - axes = _check_plot_works(hist_df.plot.box, column=column, by=by) + axes = _check_plot_works( + hist_df.plot.box, default_axes=True, column=column, by=by + ) result_titles = [ax.get_title() for ax in axes] result_xticklabels = [ [label.get_text() for label in ax.get_xticklabels()] for ax in axes @@ -313,7 +319,7 @@ def test_box_plot_by_0(self, by, column, titles, xticklabels, hist_df): df = hist_df.copy() df = df.rename(columns={"C": 0}) - axes = _check_plot_works(df.plot.box, column=column, by=by) + axes = _check_plot_works(df.plot.box, default_axes=True, column=column, by=by) result_titles = [ax.get_title() for ax in axes] result_xticklabels = [ [label.get_text() for label in ax.get_xticklabels()] for ax in axes @@ -335,7 +341,7 @@ def test_box_plot_with_none_empty_list_by(self, by, column, hist_df): # GH 15079 msg = "No group keys passed" with pytest.raises(ValueError, match=msg): - _check_plot_works(hist_df.plot.box, column=column, by=by) + _check_plot_works(hist_df.plot.box, default_axes=True, column=column, by=by) @pytest.mark.slow @pytest.mark.parametrize( From 05621635ef26a28cbe4f8d5f307157dfecba0e3d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 18 Aug 2022 22:14:33 -0700 Subject: [PATCH 2/4] Fix tests --- pandas/plotting/_matplotlib/core.py | 3 ++- pandas/plotting/_matplotlib/hist.py | 1 - pandas/plotting/_matplotlib/misc.py | 5 ++--- pandas/tests/plotting/frame/test_hist_box_by.py | 4 +++- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 7d8c7da6dd9aa..0b6e5b346062a 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -56,6 +56,7 @@ from pandas.io.formats.printing import pprint_thing from pandas.plotting._matplotlib.converter import register_pandas_matplotlib_converters from pandas.plotting._matplotlib.groupby import reconstruct_data_with_by +from pandas.plotting._matplotlib.misc import unpack_single_str_list from pandas.plotting._matplotlib.style import get_standard_colors from pandas.plotting._matplotlib.timeseries import ( decorate_axes, @@ -177,7 +178,7 @@ def __init__( # For `hist` plot, need to get grouped original data before `self.data` is # updated later if self.by is not None and self._kind == "hist": - self._grouped = data.groupby(self.by) + self._grouped = data.groupby(unpack_single_str_list(self.by)) self.kind = kind diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 3ca00ae41d587..d69f68d9e0b66 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -63,7 +63,6 @@ def __init__( MPLPlot.__init__(self, data, **kwargs) def _args_adjust(self): - # calculate bin number separately in different subplots # where subplots are created based on by argument if is_integer(self.bins): diff --git a/pandas/plotting/_matplotlib/misc.py b/pandas/plotting/_matplotlib/misc.py index 4b74b067053a6..633cb63664823 100644 --- a/pandas/plotting/_matplotlib/misc.py +++ b/pandas/plotting/_matplotlib/misc.py @@ -479,7 +479,6 @@ def r(h): def unpack_single_str_list(keys): # GH 42795 - if isinstance(keys, list): - if len(keys) == 1 and isinstance(keys[0], str): - keys = keys[0] + if isinstance(keys, list) and len(keys) == 1: + keys = keys[0] return keys diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py index 49711ea7635da..999118144b58d 100644 --- a/pandas/tests/plotting/frame/test_hist_box_by.py +++ b/pandas/tests/plotting/frame/test_hist_box_by.py @@ -357,7 +357,9 @@ def test_box_plot_with_none_empty_list_by(self, by, column, hist_df): ) def test_box_plot_layout_with_by(self, by, column, layout, axes_num, hist_df): # GH 15079 - axes = _check_plot_works(hist_df.plot.box, column=column, by=by, layout=layout) + axes = _check_plot_works( + hist_df.plot.box, default_axes=True, column=column, by=by, layout=layout + ) self._check_axes_shape(axes, axes_num=axes_num, layout=layout) @pytest.mark.parametrize( From 3b8c5e8d5892e98b36184bfbc58dd1449fadaba0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 26 Aug 2022 12:20:52 -0700 Subject: [PATCH 3/4] Use sort fixture --- pandas/tests/groupby/test_counting.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py index 5dda57767a6dc..7e7f1a628da6e 100644 --- a/pandas/tests/groupby/test_counting.py +++ b/pandas/tests/groupby/test_counting.py @@ -188,11 +188,10 @@ def test_ngroup_cumcount_pair(self): tm.assert_series_equal(g.ngroup(), Series(ngroupd)) tm.assert_series_equal(g.cumcount(), Series(cumcounted)) - @pytest.mark.parametrize("sort_flag", [False, True]) - def test_ngroup_respects_groupby_order(self, sort_flag): + def test_ngroup_respects_groupby_order(self, sort): np.random.seed(0) df = DataFrame({"a": np.random.choice(list("abcdef"), 100)}) - g = df.groupby("a", sort=sort_flag) + g = df.groupby("a", sort=sort) df["group_id"] = -1 df["group_index"] = -1 From d4865c5efbcd840b22f9165085e6871be953ae35 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 26 Aug 2022 12:48:14 -0700 Subject: [PATCH 4/4] Use not_indexed_same to avoid warning --- pandas/core/groupby/generic.py | 8 ++++++-- pandas/core/groupby/groupby.py | 5 ++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index cd91e89554b67..7fe1d55ba55be 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1634,7 +1634,9 @@ def func(df): return df._constructor_sliced(result, index=res.index) func.__name__ = "idxmax" - result = self._python_apply_general(func, self._obj_with_exclusions) + result = self._python_apply_general( + func, self._obj_with_exclusions, not_indexed_same=True + ) self._maybe_warn_numeric_only_depr("idxmax", result, numeric_only) return result @@ -1673,7 +1675,9 @@ def func(df): return df._constructor_sliced(result, index=res.index) func.__name__ = "idxmin" - result = self._python_apply_general(func, self._obj_with_exclusions) + result = self._python_apply_general( + func, self._obj_with_exclusions, not_indexed_same=True + ) self._maybe_warn_numeric_only_depr("idxmin", result, numeric_only) return result diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 16ee154156616..89c9f3701a424 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1040,7 +1040,10 @@ def curried(x): return self._obj_with_exclusions result = self._python_apply_general( - curried, self._obj_with_exclusions, is_transform=is_transform + curried, + self._obj_with_exclusions, + is_transform=is_transform, + not_indexed_same=not is_transform, ) if self._selected_obj.ndim != 1 and self.axis != 1 and result.ndim != 1: