pandas-dev · Dr-Irv · Sep 30, 2022 · Sep 27, 2022 · Sep 28, 2022 · Sep 28, 2022
diff --git a/pandas-stubs/__init__.pyi b/pandas-stubs/__init__.pyi
@@ -17,6 +17,7 @@ from ._config import (
 )
 from .core.api import (
     NA as NA,
+    ArrowDtype as ArrowDtype,
     BooleanDtype as BooleanDtype,
     Categorical as Categorical,
     CategoricalDtype as CategoricalDtype,

diff --git a/pandas-stubs/_libs/tslibs/offsets.pyi b/pandas-stubs/_libs/tslibs/offsets.pyi
@@ -225,7 +225,33 @@ class CustomBusinessHour(BusinessHour):
 
 class CustomBusinessMonthEnd(_CustomBusinessMonth): ...
 class CustomBusinessMonthBegin(_CustomBusinessMonth): ...
-class DateOffset(RelativeDeltaOffset): ...
+
+class DateOffset(RelativeDeltaOffset):
+    def __init__(
+        self,
+        *,
+        n: int = ...,
+        normalize: bool = ...,
+        years: int = ...,
+        months: int = ...,
+        weeks: int = ...,
+        days: int = ...,
+        hours: int = ...,
+        minutes: int = ...,
+        seconds: int = ...,
+        milliseconds: int = ...,
+        microseconds: int = ...,
+        nanoseconds: int = ...,
+        year: int = ...,
+        month: int = ...,
+        day: int = ...,
+        weekday: int = ...,
+        hour: int = ...,
+        minute: int = ...,
+        second: int = ...,
+        microsecond: int = ...,
+        nanosecond: int = ...,
+    ): ...
 
 BDay = BusinessDay
 BMonthEnd = BusinessMonthEnd

diff --git a/pandas-stubs/_testing/__init__.pyi b/pandas-stubs/_testing/__init__.pyi
@@ -3,6 +3,7 @@ from typing import (
     Any,
     Generator,
     Literal,
+    overload,
 )
 
 from pandas import (
@@ -54,6 +55,7 @@ def assert_extension_array_equal(
     check_less_precise: bool = ...,
     check_exact: bool = ...,
 ) -> None: ...
+@overload
 def assert_series_equal(
     left: Series,
     right: Series,
@@ -71,7 +73,29 @@ def assert_series_equal(
     atol: float = ...,
     obj: str = ...,
     *,
-    check_index: bool = ...,
+    check_index: Literal[False],
+    check_like: Literal[False],
+) -> None: ...
+@overload
+def assert_series_equal(
+    left: Series,
+    right: Series,
+    check_dtype: bool = ...,
+    check_index_type: bool | str = ...,
+    check_series_type: bool = ...,
+    check_names: bool = ...,
+    check_exact: bool = ...,
+    check_datetimelike_compat: bool = ...,
+    check_categorical: bool = ...,
+    check_category_order: bool = ...,
+    check_freq: bool = ...,
+    check_flags: bool = ...,
+    rtol: float = ...,
+    atol: float = ...,
+    obj: str = ...,
+    *,
+    check_index: Literal[True] = ...,
+    check_like: bool = ...,
 ) -> None: ...
 def assert_frame_equal(
     left: DataFrame,

diff --git a/pandas-stubs/core/api.pyi b/pandas-stubs/core/api.pyi
@@ -4,6 +4,7 @@ from pandas.core.algorithms import (
     value_counts as value_counts,
 )
 from pandas.core.arrays import Categorical as Categorical
+from pandas.core.arrays.arrow.dtype import ArrowDtype as ArrowDtype
 from pandas.core.arrays.boolean import BooleanDtype as BooleanDtype
 from pandas.core.arrays.floating import (
     Float32Dtype as Float32Dtype,

diff --git a/pandas-stubs/core/arrays/arrow/dtype.pyi b/pandas-stubs/core/arrays/arrow/dtype.pyi
@@ -0,0 +1,8 @@
+import numpy as np
+import pyarrow as pa
+
+from pandas.core.dtypes.base import StorageExtensionDtype
+
+class ArrowDtype(StorageExtensionDtype):
+    pyarrow_dtype: pa.DataType
+    def __init__(self, pyarrow_dtype: pa.DataType) -> None: ...
diff --git a/pandas-stubs/core/dtypes/base.pyi b/pandas-stubs/core/dtypes/base.pyi
@@ -22,3 +22,5 @@ class ExtensionDtype:
     def construct_from_string(cls, string: str): ...
     @classmethod
     def is_dtype(cls, dtype) -> bool: ...
+
+class StorageExtensionDtype(ExtensionDtype): ...
diff --git a/pandas-stubs/core/indexes/base.pyi b/pandas-stubs/core/indexes/base.pyi
@@ -28,6 +28,7 @@ from pandas._typing import (
     Dtype,
     DtypeArg,
     DtypeObj,
+    HashableT,
     IndexT,
     Label,
     Level,
@@ -148,7 +149,7 @@ class Index(IndexOpsMixin, PandasObject):
     def __neg__(self: IndexT) -> IndexT: ...
     def __nonzero__(self) -> None: ...
     __bool__ = ...
-    def union(self, other: list[T1] | Index, sort=...) -> Index: ...
+    def union(self, other: list[HashableT] | Index, sort=...) -> Index: ...
     def intersection(self, other: list[T1] | Index, sort: bool = ...) -> Index: ...
     def difference(self, other: list | Index) -> Index: ...
     def symmetric_difference(

diff --git a/pandas-stubs/core/indexes/multi.pyi b/pandas-stubs/core/indexes/multi.pyi
@@ -6,11 +6,13 @@ from typing import (
 )
 
 import numpy as np
+import pandas as pd
 from pandas.core.indexes.base import Index
 
 from pandas._typing import (
     T1,
     DtypeArg,
+    HashableT,
     np_ndarray_bool,
 )
 
@@ -88,7 +90,12 @@ class MultiIndex(Index):
     def get_value(self, series, key): ...
     def get_level_values(self, level: str | int) -> Index: ...
     def unique(self, level=...): ...
-    def to_frame(self, index: bool = ..., name=...): ...
+    def to_frame(
+        self,
+        index: bool = ...,
+        name: list[HashableT] = ...,
+        allow_duplicates: bool = ...,
+    ) -> pd.DataFrame: ...
     def to_flat_index(self): ...
     @property
     def is_all_dates(self) -> bool: ...

diff --git a/pandas-stubs/core/indexes/range.pyi b/pandas-stubs/core/indexes/range.pyi
@@ -1,7 +1,11 @@
 import numpy as np
+from pandas.core.indexes.base import Index
 from pandas.core.indexes.numeric import Int64Index
 
-from pandas._typing import npt
+from pandas._typing import (
+    HashableT,
+    npt,
+)
 
 class RangeIndex(Int64Index):
     def __new__(
@@ -70,3 +74,6 @@ class RangeIndex(Int64Index):
     def __floordiv__(self, other): ...
     def all(self) -> bool: ...
     def any(self) -> bool: ...
+    def union(
+        self, other: list[HashableT] | Index, sort=...
+    ) -> Index | Int64Index | RangeIndex: ...
diff --git a/pandas-stubs/core/window/ewm.pyi b/pandas-stubs/core/window/ewm.pyi
@@ -35,7 +35,7 @@ class ExponentialMovingWindow(BaseWindow[NDFrameT], Generic[NDFrameT]):
         adjust: bool = ...,
         ignore_na: bool = ...,
         axis: Axis = ...,
-        times: str | np.ndarray | Series | None = ...,
+        times: str | np.ndarray | Series | None | np.timedelta64 = ...,
         method: CalculationMethod = ...,
     ) -> None: ...
     @overload

diff --git a/pandas-stubs/io/parsers/readers.pyi b/pandas-stubs/io/parsers/readers.pyi
@@ -1,4 +1,7 @@
-from collections import abc
+from collections import (
+    abc,
+    defaultdict,
+)
 import csv
 from types import TracebackType
 from typing import (
@@ -18,6 +21,7 @@ from pandas._typing import (
     CompressionOptions,
     CSVEngine,
     CSVQuoting,
+    Dtype,
     DtypeArg,
     FilePath,
     ReadCsvBuffer,
@@ -44,7 +48,7 @@ def read_csv(
     | npt.NDArray
     | Callable[[str], bool]
     | None = ...,
-    dtype: DtypeArg | None = ...,
+    dtype: DtypeArg | defaultdict[str, Dtype] | None = ...,
     engine: CSVEngine | None = ...,
     converters: dict[int | str, Callable[[str], Any]] = ...,
     true_values: list[str] = ...,

diff --git a/tests/test_frame.py b/tests/test_frame.py
@@ -821,6 +821,19 @@ def test_types_to_feather() -> None:
             df.to_feather(file)
 
 
+def test_arrow_dtype() -> None:
+    pytest.importorskip("pyarrow")
+
+    import pyarrow as pa
+
+    check(
+        assert_type(
+            pd.ArrowDtype(pa.timestamp("s", tz="America/New_York")), pd.ArrowDtype
+        ),
+        pd.ArrowDtype,
+    )
+
+
 # compare() method added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html
 def test_types_compare() -> None:
     df1 = pd.DataFrame(

diff --git a/tests/test_indexes.py b/tests/test_indexes.py
@@ -1,9 +1,12 @@
 from __future__ import annotations
 
+from typing import Union
+
 import numpy as np
 from numpy import typing as npt
 import pandas as pd
 from pandas.core.indexes.numeric import NumericIndex
+import pytest
 from typing_extensions import assert_type
 
 from tests import check
@@ -31,6 +34,10 @@ def test_index_astype() -> None:
     mi = pd.MultiIndex.from_product([["a", "b"], ["c", "d"]], names=["ab", "cd"])
     mia = mi.astype(object)  # object is only valid parameter for MultiIndex.astype()
     check(assert_type(mia, pd.MultiIndex), pd.MultiIndex)
+    check(
+        assert_type(mi.to_frame(name=[3, 7], allow_duplicates=True), pd.DataFrame),
+        pd.DataFrame,
+    )
 
 
 def test_multiindex_get_level_values() -> None:
@@ -148,3 +155,28 @@ def test_index_relops() -> None:
     check(assert_type(ind >= 2, npt.NDArray[np.bool_]), np.ndarray, np.bool_)
     check(assert_type(ind < 2, npt.NDArray[np.bool_]), np.ndarray, np.bool_)
     check(assert_type(ind > 2, npt.NDArray[np.bool_]), np.ndarray, np.bool_)
+
+
+def test_range_index_union():
+    with pytest.warns(FutureWarning, match="pandas.Int64Index"):
+        check(
+            assert_type(
+                pd.RangeIndex(0, 10).union(pd.RangeIndex(10, 20)),
+                Union[pd.Index, pd.Int64Index, pd.RangeIndex],
+            ),
+            pd.RangeIndex,
+        )
+        check(
+            assert_type(
+                pd.RangeIndex(0, 10).union([11, 12, 13]),
+                Union[pd.Index, pd.Int64Index, pd.RangeIndex],
+            ),
+            pd.Int64Index,
+        )
+        check(
+            assert_type(
+                pd.RangeIndex(0, 10).union(["a", "b", "c"]),
+                Union[pd.Index, pd.Int64Index, pd.RangeIndex],
+            ),
+            pd.Index,
+        )
diff --git a/tests/test_io.py b/tests/test_io.py
@@ -1,3 +1,4 @@
+from collections import defaultdict
 import csv
 import io
 import os.path
@@ -208,6 +209,10 @@ def test_clipboard():
     check(assert_type(read_clipboard(), DataFrame), DataFrame)
     check(assert_type(read_clipboard(iterator=False), DataFrame), DataFrame)
     check(assert_type(read_clipboard(chunksize=None), DataFrame), DataFrame)
+    check(
+        assert_type(read_clipboard(dtype=defaultdict(lambda: "f8")), DataFrame),
+        DataFrame,
+    )
 
 
 def test_clipboard_iterator():
@@ -426,6 +431,11 @@ def test_read_csv():
         check(assert_type(read_csv(path, iterator=False), DataFrame), DataFrame)
         check(assert_type(read_csv(path, chunksize=None), DataFrame), DataFrame)
 
+        check(
+            assert_type(read_csv(path, dtype=defaultdict(lambda: "f8")), DataFrame),
+            DataFrame,
+        )
+
 
 def test_read_csv_iterator():
     with ensure_clean() as path:
@@ -489,6 +499,10 @@ def test_read_table():
         check(assert_type(read_table(path), DataFrame), DataFrame)
         check(assert_type(read_table(path, iterator=False), DataFrame), DataFrame)
         check(assert_type(read_table(path, chunksize=None), DataFrame), DataFrame)
+        check(
+            assert_type(read_table(path, dtype=defaultdict(lambda: "f8")), DataFrame),
+            DataFrame,
+        )
 
 
 def test_read_table_iterator():

diff --git a/tests/test_testing.py b/tests/test_testing.py
@@ -29,13 +29,14 @@ def test_types_assert_series_equal() -> None:
         check_datetimelike_compat=True,
     )
     if TYPE_CHECKING_INVALID_USAGE:
-        assert_series_equal(
+        assert_series_equal(  # type: ignore[call-overload]
             s1,
             s2,
             check_dtype=True,
-            check_less_precise=True,  # type: ignore[call-arg]
+            check_less_precise=True,
             check_names=True,
         )
+    assert_series_equal(s1, s2, check_like=True)
 
 
 def test_assert_frame_equal():