Skip to content

Commit fccdd10

Browse files
authored
1.5 dataframe changes (#334)
* ENH: Add subplots type * ENH: Add isetitem * ENH: Add method to quantile * ENH: Add result_names to df.compate * ENH: Add validate to join * Add allow_duplkicated and names to reset_index * ENH/CLN: Improve df.resample Add group_keys Remove 1.1 deprecatsions of loffet and base * ENH: Add allow_duplicates to Series * TST: Add tests for new features
1 parent 6d013b4 commit fccdd10

File tree

7 files changed

+118
-5
lines changed

7 files changed

+118
-5
lines changed

pandas-stubs/_typing.pyi

+3
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ Axis = Union[str, int]
100100
IndexLabel = Union[Hashable, Sequence[Hashable]]
101101
Label = Optional[Hashable]
102102
Level = Union[Hashable, int]
103+
Suffixes = tuple[Optional[str], Optional[str]]
103104
Ordered = Optional[bool]
104105
JSONSerializable = Union[PythonScalar, list, dict]
105106
Axes = Union[AnyArrayLike, list, dict, range]
@@ -302,4 +303,6 @@ class StyleExportDict(TypedDict, total=False):
302303
hide_column_names: bool
303304
css: dict[str, str | int]
304305

306+
CalculationMethod = Literal["single", "table"]
307+
305308
__all__ = ["npt", "type_t"]

pandas-stubs/core/frame.pyi

+30-2
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ from pandas._typing import (
5656
Axes,
5757
Axis,
5858
AxisType,
59+
CalculationMethod,
5960
ColspaceArgType,
6061
CompressionOptions,
6162
Dtype,
@@ -91,6 +92,7 @@ from pandas._typing import (
9192
StataDateFormat,
9293
StorageOptions,
9394
StrLike,
95+
Suffixes,
9496
T as TType,
9597
TimestampConvention,
9698
WriteBuffer,
@@ -496,6 +498,9 @@ class DataFrame(NDFrame, OpsMixin):
496498
| np_ndarray_bool
497499
| Sequence[tuple[Scalar, ...]],
498500
) -> DataFrame: ...
501+
def isetitem(
502+
self, loc: int | Sequence[int], value: Scalar | ArrayLike | list[Any]
503+
) -> None: ...
499504
def __setitem__(self, key, value): ...
500505
@overload
501506
def query(self, expr: _str, *, inplace: Literal[True], **kwargs) -> None: ...
@@ -741,6 +746,8 @@ class DataFrame(NDFrame, OpsMixin):
741746
col_fill: Hashable = ...,
742747
*,
743748
inplace: Literal[True],
749+
allow_duplicates: _bool = ...,
750+
names: Hashable | list[HashableT] = ...,
744751
) -> None: ...
745752
@overload
746753
def reset_index(
@@ -751,6 +758,8 @@ class DataFrame(NDFrame, OpsMixin):
751758
col_fill: Hashable = ...,
752759
*,
753760
inplace: Literal[False],
761+
allow_duplicates: _bool = ...,
762+
names: Hashable | list[HashableT] = ...,
754763
) -> DataFrame: ...
755764
@overload
756765
def reset_index(
@@ -760,6 +769,8 @@ class DataFrame(NDFrame, OpsMixin):
760769
*,
761770
col_level: int | _str = ...,
762771
col_fill: Hashable = ...,
772+
allow_duplicates: _bool = ...,
773+
names: Hashable | list[HashableT] = ...,
763774
) -> DataFrame: ...
764775
@overload
765776
def reset_index(
@@ -769,6 +780,8 @@ class DataFrame(NDFrame, OpsMixin):
769780
inplace: _bool | None = ...,
770781
col_level: int | _str = ...,
771782
col_fill: Hashable = ...,
783+
allow_duplicates: _bool = ...,
784+
names: Hashable | list[HashableT] = ...,
772785
) -> DataFrame | None: ...
773786
def isna(self) -> DataFrame: ...
774787
def isnull(self) -> DataFrame: ...
@@ -957,6 +970,7 @@ class DataFrame(NDFrame, OpsMixin):
957970
align_axis: Axis = ...,
958971
keep_shape: bool = ...,
959972
keep_equal: bool = ...,
973+
result_names: Suffixes = ...,
960974
) -> DataFrame: ...
961975
def combine(
962976
self,
@@ -1086,6 +1100,17 @@ class DataFrame(NDFrame, OpsMixin):
10861100
lsuffix: _str = ...,
10871101
rsuffix: _str = ...,
10881102
sort: _bool = ...,
1103+
validate: Literal[
1104+
"one_to_one",
1105+
"1:1",
1106+
"one_to_many",
1107+
"1:m",
1108+
"many_to_one",
1109+
"m:1",
1110+
"many_to_many",
1111+
"m:m",
1112+
]
1113+
| None = ...,
10891114
) -> DataFrame: ...
10901115
def merge(
10911116
self,
@@ -1163,6 +1188,7 @@ class DataFrame(NDFrame, OpsMixin):
11631188
axis: AxisType = ...,
11641189
numeric_only: _bool = ...,
11651190
interpolation: QuantileInterpolation = ...,
1191+
method: CalculationMethod = ...,
11661192
) -> Series: ...
11671193
@overload
11681194
def quantile(
@@ -1171,6 +1197,7 @@ class DataFrame(NDFrame, OpsMixin):
11711197
axis: AxisType = ...,
11721198
numeric_only: _bool = ...,
11731199
interpolation: QuantileInterpolation = ...,
1200+
method: CalculationMethod = ...,
11741201
) -> DataFrame: ...
11751202
def to_timestamp(
11761203
self,
@@ -1716,13 +1743,14 @@ class DataFrame(NDFrame, OpsMixin):
17161743
label: _str | None = ...,
17171744
convention: TimestampConvention = ...,
17181745
kind: Literal["timestamp", "period"] | None = ...,
1719-
loffset=...,
1720-
base: int = ...,
1746+
# Not actually positional but needed due to deprecations
1747+
*,
17211748
on: _str | None = ...,
17221749
level: Level | None = ...,
17231750
origin: Timestamp
17241751
| Literal["epoch", "start", "start_day", "end", "end_day"] = ...,
17251752
offset: Timedelta | _str | None = ...,
1753+
group_keys: _bool = ...,
17261754
) -> Resampler[DataFrame]: ...
17271755
def rfloordiv(
17281756
self,

pandas-stubs/core/series.pyi

+6
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
281281
*,
282282
name: object | None = ...,
283283
inplace: _bool = ...,
284+
allow_duplicates: bool = ...,
284285
) -> Series[S1]: ...
285286
@overload
286287
def reset_index(
@@ -290,6 +291,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
290291
*,
291292
name: object | None = ...,
292293
inplace: _bool = ...,
294+
allow_duplicates: bool = ...,
293295
) -> Series[S1]: ...
294296
@overload
295297
def reset_index(
@@ -299,6 +301,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
299301
level: Sequence[Level] | None = ...,
300302
name: object | None = ...,
301303
inplace: _bool = ...,
304+
allow_duplicates: bool = ...,
302305
) -> Series[S1]: ...
303306
@overload
304307
def reset_index(
@@ -308,6 +311,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
308311
level: Level | None = ...,
309312
name: object | None = ...,
310313
inplace: _bool = ...,
314+
allow_duplicates: bool = ...,
311315
) -> Series[S1]: ...
312316
@overload
313317
def reset_index(
@@ -316,6 +320,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
316320
drop: Literal[False] = ...,
317321
name: object | None = ...,
318322
inplace: _bool = ...,
323+
allow_duplicates: bool = ...,
319324
) -> DataFrame: ...
320325
@overload
321326
def reset_index(
@@ -324,6 +329,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
324329
drop: Literal[False] = ...,
325330
name: object | None = ...,
326331
inplace: _bool = ...,
332+
allow_duplicates: bool = ...,
327333
) -> DataFrame: ...
328334
@overload
329335
def to_string(

pandas-stubs/plotting/_core.pyi

+3-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ from typing import (
22
Any,
33
Callable,
44
Hashable,
5+
Iterable,
56
Literal,
67
NamedTuple,
78
Sequence,
@@ -156,7 +157,7 @@ class PlotAccessor(PandasObject):
156157
"hexbin",
157158
] = ...,
158159
ax: Axes | None = ...,
159-
subplots: Literal[True],
160+
subplots: Literal[True] | Sequence[Iterable[HashableT]],
160161
sharex: bool = ...,
161162
sharey: bool = ...,
162163
layout: tuple[int, int] = ...,
@@ -199,7 +200,7 @@ class PlotAccessor(PandasObject):
199200
y: Hashable | Sequence[Hashable] = ...,
200201
kind: Literal["box"],
201202
ax: Axes | None = ...,
202-
subplots: Literal[True],
203+
subplots: Literal[True] | Sequence[Iterable[HashableT]],
203204
sharex: bool = ...,
204205
sharey: bool = ...,
205206
layout: tuple[int, int] = ...,

tests/test_frame.py

+64-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
ensure_clean,
2828
getSeriesData,
2929
)
30+
from pandas.core.resample import Resampler # noqa: F401
3031
import pytest
3132
from typing_extensions import assert_type
3233
import xarray as xr
@@ -1370,7 +1371,12 @@ def test_join() -> None:
13701371
seriesB = float_frame["B"]
13711372
frameCD = float_frame[["C", "D"]]
13721373
right: list[pd.Series | pd.DataFrame] = [seriesB, frameCD]
1373-
result = left.join(right)
1374+
check(assert_type(left.join(right), pd.DataFrame), pd.DataFrame)
1375+
check(assert_type(left.join(right, validate="1:1"), pd.DataFrame), pd.DataFrame)
1376+
check(
1377+
assert_type(left.join(right, validate="one_to_one"), pd.DataFrame), pd.DataFrame
1378+
)
1379+
check(assert_type(left.join(right, validate="1:m"), pd.DataFrame), pd.DataFrame)
13741380

13751381

13761382
def test_types_ffill() -> None:
@@ -1816,3 +1822,60 @@ def test_replace_na() -> None:
18161822
# GH 262
18171823
frame = pd.DataFrame(["N/A", "foo", "bar"])
18181824
check(assert_type(frame.replace("N/A", pd.NA), pd.DataFrame), pd.DataFrame)
1825+
1826+
1827+
def test_isetframe() -> None:
1828+
frame = pd.DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
1829+
check(assert_type(frame.isetitem(0, 10), None), type(None))
1830+
check(assert_type(frame.isetitem([0], [10, 12]), None), type(None))
1831+
1832+
1833+
def test_reset_index_150_changes() -> None:
1834+
frame = pd.DataFrame({"a": [1, 2, 3, 4]}, index=[-10, -9, -8, -7])
1835+
check(
1836+
assert_type(
1837+
frame.reset_index(allow_duplicates=True, names="idx"), pd.DataFrame
1838+
),
1839+
pd.DataFrame,
1840+
)
1841+
check(
1842+
assert_type(
1843+
frame.reset_index(allow_duplicates=True, names=["idx"]), pd.DataFrame
1844+
),
1845+
pd.DataFrame,
1846+
)
1847+
1848+
1849+
def test_compare_150_changes() -> None:
1850+
frame_a = pd.DataFrame({"a": [1, 2, 3, 4]}, index=[-10, -9, -8, -7])
1851+
frame_b = pd.DataFrame({"a": [1, 2, 4, 3]}, index=[-10, -9, -8, -7])
1852+
check(
1853+
assert_type(
1854+
frame_a.compare(frame_b, result_names=("one", "the_other")), pd.DataFrame
1855+
),
1856+
pd.DataFrame,
1857+
)
1858+
1859+
1860+
def test_quantile_150_changes() -> None:
1861+
frame = pd.DataFrame(getSeriesData())
1862+
check(assert_type(frame.quantile(0.5, method="single"), pd.Series), pd.Series)
1863+
check(
1864+
assert_type(
1865+
frame.quantile([0.25, 0.5, 0.75], interpolation="nearest", method="table"),
1866+
pd.DataFrame,
1867+
),
1868+
pd.DataFrame,
1869+
)
1870+
1871+
1872+
def test_resample_150_changes() -> None:
1873+
idx = pd.date_range("2020-1-1", periods=700)
1874+
frame = pd.DataFrame(np.random.standard_normal((700, 1)), index=idx, columns=["a"])
1875+
resampler = frame.resample("M", group_keys=True)
1876+
assert_type(resampler, "Resampler[pd.DataFrame]")
1877+
1878+
def f(s: pd.DataFrame) -> pd.Series:
1879+
return s.mean()
1880+
1881+
check(assert_type(resampler.apply(f), Union[pd.Series, pd.DataFrame]), pd.DataFrame)

tests/test_plotting.py

+10
Original file line numberDiff line numberDiff line change
@@ -573,3 +573,13 @@ def test_plot_keywords(close_figures):
573573
),
574574
plt.Axes,
575575
)
576+
577+
578+
def test_plot_subplot_changes_150() -> None:
579+
df = pd.DataFrame(np.random.standard_normal((25, 4)), columns=["a", "b", "c", "d"])
580+
check(
581+
assert_type(
582+
df.plot(subplots=[("a", "b"), ("c", "d")]), npt.NDArray[np.object_]
583+
),
584+
np.ndarray,
585+
)

tests/test_series.py

+2
Original file line numberDiff line numberDiff line change
@@ -817,6 +817,8 @@ def test_reset_index() -> None:
817817
check(assert_type(r4, pd.Series), pd.Series)
818818
r5 = s.reset_index(["ab"], drop=True)
819819
check(assert_type(r5, pd.Series), pd.Series)
820+
r6 = s.reset_index(["ab"], drop=True, allow_duplicates=True)
821+
check(assert_type(r6, pd.Series), pd.Series)
820822

821823

822824
def test_series_add_str() -> None:

0 commit comments

Comments
 (0)