1.5 dataframe changes (#334)

bashtage · web-flow · commit fccdd1078553 · 2022-09-27T12:19:15.000-04:00
* ENH: Add subplots type

* ENH: Add isetitem

* ENH: Add method to quantile

* ENH: Add result_names to df.compate

* ENH: Add validate to join

* Add allow_duplkicated and names to reset_index

* ENH/CLN: Improve df.resample

Add group_keys
Remove 1.1 deprecatsions of loffet and base

* ENH: Add allow_duplicates to Series

* TST: Add tests for new features
diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi
@@ -100,6 +100,7 @@ Axis = Union[str, int]
 IndexLabel = Union[Hashable, Sequence[Hashable]]
 Label = Optional[Hashable]
 Level = Union[Hashable, int]
+Suffixes = tuple[Optional[str], Optional[str]]
 Ordered = Optional[bool]
 JSONSerializable = Union[PythonScalar, list, dict]
 Axes = Union[AnyArrayLike, list, dict, range]
@@ -302,4 +303,6 @@ class StyleExportDict(TypedDict, total=False):
     hide_column_names: bool
     css: dict[str, str | int]
 
+CalculationMethod = Literal["single", "table"]
+
 __all__ = ["npt", "type_t"]
diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi
@@ -56,6 +56,7 @@ from pandas._typing import (
     Axes,
     Axis,
     AxisType,
+    CalculationMethod,
     ColspaceArgType,
     CompressionOptions,
     Dtype,
@@ -91,6 +92,7 @@ from pandas._typing import (
     StataDateFormat,
     StorageOptions,
     StrLike,
+    Suffixes,
     T as TType,
     TimestampConvention,
     WriteBuffer,
@@ -496,6 +498,9 @@ class DataFrame(NDFrame, OpsMixin):
         | np_ndarray_bool
         | Sequence[tuple[Scalar, ...]],
     ) -> DataFrame: ...
+    def isetitem(
+        self, loc: int | Sequence[int], value: Scalar | ArrayLike | list[Any]
+    ) -> None: ...
     def __setitem__(self, key, value): ...
     @overload
     def query(self, expr: _str, *, inplace: Literal[True], **kwargs) -> None: ...
@@ -741,6 +746,8 @@ class DataFrame(NDFrame, OpsMixin):
         col_fill: Hashable = ...,
         *,
         inplace: Literal[True],
+        allow_duplicates: _bool = ...,
+        names: Hashable | list[HashableT] = ...,
     ) -> None: ...
     @overload
     def reset_index(
@@ -751,6 +758,8 @@ class DataFrame(NDFrame, OpsMixin):
         col_fill: Hashable = ...,
         *,
         inplace: Literal[False],
+        allow_duplicates: _bool = ...,
+        names: Hashable | list[HashableT] = ...,
     ) -> DataFrame: ...
     @overload
     def reset_index(
@@ -760,6 +769,8 @@ class DataFrame(NDFrame, OpsMixin):
         *,
         col_level: int | _str = ...,
         col_fill: Hashable = ...,
+        allow_duplicates: _bool = ...,
+        names: Hashable | list[HashableT] = ...,
     ) -> DataFrame: ...
     @overload
     def reset_index(
@@ -769,6 +780,8 @@ class DataFrame(NDFrame, OpsMixin):
         inplace: _bool | None = ...,
         col_level: int | _str = ...,
         col_fill: Hashable = ...,
+        allow_duplicates: _bool = ...,
+        names: Hashable | list[HashableT] = ...,
     ) -> DataFrame | None: ...
     def isna(self) -> DataFrame: ...
     def isnull(self) -> DataFrame: ...
@@ -957,6 +970,7 @@ class DataFrame(NDFrame, OpsMixin):
         align_axis: Axis = ...,
         keep_shape: bool = ...,
         keep_equal: bool = ...,
+        result_names: Suffixes = ...,
     ) -> DataFrame: ...
     def combine(
         self,
@@ -1086,6 +1100,17 @@ class DataFrame(NDFrame, OpsMixin):
         lsuffix: _str = ...,
         rsuffix: _str = ...,
         sort: _bool = ...,
+        validate: Literal[
+            "one_to_one",
+            "1:1",
+            "one_to_many",
+            "1:m",
+            "many_to_one",
+            "m:1",
+            "many_to_many",
+            "m:m",
+        ]
+        | None = ...,
     ) -> DataFrame: ...
     def merge(
         self,
@@ -1163,6 +1188,7 @@ class DataFrame(NDFrame, OpsMixin):
         axis: AxisType = ...,
         numeric_only: _bool = ...,
         interpolation: QuantileInterpolation = ...,
+        method: CalculationMethod = ...,
     ) -> Series: ...
     @overload
     def quantile(
@@ -1171,6 +1197,7 @@ class DataFrame(NDFrame, OpsMixin):
         axis: AxisType = ...,
         numeric_only: _bool = ...,
         interpolation: QuantileInterpolation = ...,
+        method: CalculationMethod = ...,
     ) -> DataFrame: ...
     def to_timestamp(
         self,
@@ -1716,13 +1743,14 @@ class DataFrame(NDFrame, OpsMixin):
         label: _str | None = ...,
         convention: TimestampConvention = ...,
         kind: Literal["timestamp", "period"] | None = ...,
-        loffset=...,
-        base: int = ...,
+        # Not actually positional but needed due to deprecations
+        *,
         on: _str | None = ...,
         level: Level | None = ...,
         origin: Timestamp
         | Literal["epoch", "start", "start_day", "end", "end_day"] = ...,
         offset: Timedelta | _str | None = ...,
+        group_keys: _bool = ...,
     ) -> Resampler[DataFrame]: ...
     def rfloordiv(
         self,
diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi
@@ -281,6 +281,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
         *,
         name: object | None = ...,
         inplace: _bool = ...,
+        allow_duplicates: bool = ...,
     ) -> Series[S1]: ...
     @overload
     def reset_index(
@@ -290,6 +291,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
         *,
         name: object | None = ...,
         inplace: _bool = ...,
+        allow_duplicates: bool = ...,
     ) -> Series[S1]: ...
     @overload
     def reset_index(
@@ -299,6 +301,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
         level: Sequence[Level] | None = ...,
         name: object | None = ...,
         inplace: _bool = ...,
+        allow_duplicates: bool = ...,
     ) -> Series[S1]: ...
     @overload
     def reset_index(
@@ -308,6 +311,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
         level: Level | None = ...,
         name: object | None = ...,
         inplace: _bool = ...,
+        allow_duplicates: bool = ...,
     ) -> Series[S1]: ...
     @overload
     def reset_index(
@@ -316,6 +320,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
         drop: Literal[False] = ...,
         name: object | None = ...,
         inplace: _bool = ...,
+        allow_duplicates: bool = ...,
     ) -> DataFrame: ...
     @overload
     def reset_index(
@@ -324,6 +329,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
         drop: Literal[False] = ...,
         name: object | None = ...,
         inplace: _bool = ...,
+        allow_duplicates: bool = ...,
     ) -> DataFrame: ...
     @overload
     def to_string(
diff --git a/pandas-stubs/plotting/_core.pyi b/pandas-stubs/plotting/_core.pyi
@@ -2,6 +2,7 @@ from typing import (
     Any,
     Callable,
     Hashable,
+    Iterable,
     Literal,
     NamedTuple,
     Sequence,
@@ -156,7 +157,7 @@ class PlotAccessor(PandasObject):
             "hexbin",
         ] = ...,
         ax: Axes | None = ...,
-        subplots: Literal[True],
+        subplots: Literal[True] | Sequence[Iterable[HashableT]],
         sharex: bool = ...,
         sharey: bool = ...,
         layout: tuple[int, int] = ...,
@@ -199,7 +200,7 @@ class PlotAccessor(PandasObject):
         y: Hashable | Sequence[Hashable] = ...,
         kind: Literal["box"],
         ax: Axes | None = ...,
-        subplots: Literal[True],
+        subplots: Literal[True] | Sequence[Iterable[HashableT]],
         sharex: bool = ...,
         sharey: bool = ...,
         layout: tuple[int, int] = ...,
diff --git a/tests/test_frame.py b/tests/test_frame.py
@@ -27,6 +27,7 @@
     ensure_clean,
     getSeriesData,
 )
+from pandas.core.resample import Resampler  # noqa: F401
 import pytest
 from typing_extensions import assert_type
 import xarray as xr
@@ -1370,7 +1371,12 @@ def test_join() -> None:
     seriesB = float_frame["B"]
     frameCD = float_frame[["C", "D"]]
     right: list[pd.Series | pd.DataFrame] = [seriesB, frameCD]
-    result = left.join(right)
+    check(assert_type(left.join(right), pd.DataFrame), pd.DataFrame)
+    check(assert_type(left.join(right, validate="1:1"), pd.DataFrame), pd.DataFrame)
+    check(
+        assert_type(left.join(right, validate="one_to_one"), pd.DataFrame), pd.DataFrame
+    )
+    check(assert_type(left.join(right, validate="1:m"), pd.DataFrame), pd.DataFrame)
 
 
 def test_types_ffill() -> None:
@@ -1816,3 +1822,60 @@ def test_replace_na() -> None:
     # GH 262
     frame = pd.DataFrame(["N/A", "foo", "bar"])
     check(assert_type(frame.replace("N/A", pd.NA), pd.DataFrame), pd.DataFrame)
+
+
+def test_isetframe() -> None:
+    frame = pd.DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
+    check(assert_type(frame.isetitem(0, 10), None), type(None))
+    check(assert_type(frame.isetitem([0], [10, 12]), None), type(None))
+
+
+def test_reset_index_150_changes() -> None:
+    frame = pd.DataFrame({"a": [1, 2, 3, 4]}, index=[-10, -9, -8, -7])
+    check(
+        assert_type(
+            frame.reset_index(allow_duplicates=True, names="idx"), pd.DataFrame
+        ),
+        pd.DataFrame,
+    )
+    check(
+        assert_type(
+            frame.reset_index(allow_duplicates=True, names=["idx"]), pd.DataFrame
+        ),
+        pd.DataFrame,
+    )
+
+
+def test_compare_150_changes() -> None:
+    frame_a = pd.DataFrame({"a": [1, 2, 3, 4]}, index=[-10, -9, -8, -7])
+    frame_b = pd.DataFrame({"a": [1, 2, 4, 3]}, index=[-10, -9, -8, -7])
+    check(
+        assert_type(
+            frame_a.compare(frame_b, result_names=("one", "the_other")), pd.DataFrame
+        ),
+        pd.DataFrame,
+    )
+
+
+def test_quantile_150_changes() -> None:
+    frame = pd.DataFrame(getSeriesData())
+    check(assert_type(frame.quantile(0.5, method="single"), pd.Series), pd.Series)
+    check(
+        assert_type(
+            frame.quantile([0.25, 0.5, 0.75], interpolation="nearest", method="table"),
+            pd.DataFrame,
+        ),
+        pd.DataFrame,
+    )
+
+
+def test_resample_150_changes() -> None:
+    idx = pd.date_range("2020-1-1", periods=700)
+    frame = pd.DataFrame(np.random.standard_normal((700, 1)), index=idx, columns=["a"])
+    resampler = frame.resample("M", group_keys=True)
+    assert_type(resampler, "Resampler[pd.DataFrame]")
+
+    def f(s: pd.DataFrame) -> pd.Series:
+        return s.mean()
+
+    check(assert_type(resampler.apply(f), Union[pd.Series, pd.DataFrame]), pd.DataFrame)
diff --git a/tests/test_plotting.py b/tests/test_plotting.py
@@ -573,3 +573,13 @@ def test_plot_keywords(close_figures):
         ),
         plt.Axes,
     )
+
+
+def test_plot_subplot_changes_150() -> None:
+    df = pd.DataFrame(np.random.standard_normal((25, 4)), columns=["a", "b", "c", "d"])
+    check(
+        assert_type(
+            df.plot(subplots=[("a", "b"), ("c", "d")]), npt.NDArray[np.object_]
+        ),
+        np.ndarray,
+    )
diff --git a/tests/test_series.py b/tests/test_series.py
@@ -817,6 +817,8 @@ def test_reset_index() -> None:
     check(assert_type(r4, pd.Series), pd.Series)
     r5 = s.reset_index(["ab"], drop=True)
     check(assert_type(r5, pd.Series), pd.Series)
+    r6 = s.reset_index(["ab"], drop=True, allow_duplicates=True)
+    check(assert_type(r6, pd.Series), pd.Series)
 
 
 def test_series_add_str() -> None: