From 12f9adf93813b577163b229e011d9c04fe5e332f Mon Sep 17 00:00:00 2001
From: Patrick Schleiter <4884221+pschleiter@users.noreply.github.com>
Date: Tue, 18 Apr 2023 14:33:02 +0000
Subject: [PATCH 1/8] Specify method in reindex for class dataframe

Specify parser in to_xml of class dataframe

Update doc string to_orc in class dataframeUpdate doc string to_orc in class dataframe

Specify engine in to_parquet in class dataframe
---
 pandas/_typing.py    |  3 +++
 pandas/core/frame.py | 14 ++++++++------
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/pandas/_typing.py b/pandas/_typing.py
index de02a549856ab..916e9ace59543 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -376,6 +376,9 @@ def closed(self) -> bool:
 # join
 JoinHow = Literal["left", "right", "inner", "outer"]
 
+# reindex
+ReindexMethod = Union[FillnaOptions ,Literal["nearest"]]
+
 MatplotlibColor = Union[str, Sequence[float]]
 TimeGrouperOrigin = Union[
     "Timestamp", Literal["epoch", "start", "start_day", "end", "end_day"]
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 5341b87c39676..44054ec2e8d3f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -237,6 +237,8 @@
         ValueKeyFunc,
         WriteBuffer,
         npt,
+        ReindexMethod,
+        XMLParsers,
     )
 
     from pandas.core.groupby.generic import DataFrameGroupBy
@@ -2763,7 +2765,7 @@ def to_markdown(
     def to_parquet(
         self,
         path: None = ...,
-        engine: str = ...,
+        engine: Literal["auto", "pyarrow", "fastparquet"] = ...,
         compression: str | None = ...,
         index: bool | None = ...,
         partition_cols: list[str] | None = ...,
@@ -2776,7 +2778,7 @@ def to_parquet(
     def to_parquet(
         self,
         path: FilePath | WriteBuffer[bytes],
-        engine: str = ...,
+        engine: Literal["auto", "pyarrow", "fastparquet"] = ...,
         compression: str | None = ...,
         index: bool | None = ...,
         partition_cols: list[str] | None = ...,
@@ -2789,7 +2791,7 @@ def to_parquet(
     def to_parquet(
         self,
         path: FilePath | WriteBuffer[bytes] | None = None,
-        engine: str = "auto",
+        engine: Literal["auto", "pyarrow", "fastparquet"] = "auto",
         compression: str | None = "snappy",
         index: bool | None = None,
         partition_cols: list[str] | None = None,
@@ -2919,7 +2921,7 @@ def to_orc(
             we refer to objects with a write() method, such as a file handle
             (e.g. via builtin open function). If path is None,
             a bytes object is returned.
-        engine : str, default 'pyarrow'
+        engine : {'pyarrow'}, default 'pyarrow'
             ORC library to use. Pyarrow must be >= 7.0.0.
         index : bool, optional
             If ``True``, include the dataframe's index(es) in the file output.
@@ -3155,7 +3157,7 @@ def to_xml(
         encoding: str = "utf-8",
         xml_declaration: bool | None = True,
         pretty_print: bool | None = True,
-        parser: str | None = "lxml",
+        parser: XMLParsers | None = "lxml",
         stylesheet: FilePath | ReadBuffer[str] | ReadBuffer[bytes] | None = None,
         compression: CompressionOptions = "infer",
         storage_options: StorageOptions = None,
@@ -4988,7 +4990,7 @@ def reindex(
         index=None,
         columns=None,
         axis: Axis | None = None,
-        method: str | None = None,
+        method: ReindexMethod | None = None,
         copy: bool | None = None,
         level: Level | None = None,
         fill_value: Scalar | None = np.nan,

From fc65e3a40ce34ae6405f48012e55d2f9d08574e7 Mon Sep 17 00:00:00 2001
From: benedikt-mangold <48798074+benedikt-mangold@users.noreply.github.com>
Date: Tue, 18 Apr 2023 16:37:14 +0200
Subject: [PATCH 2/8] undo changes and adding None as an optional argument type
 for validate argument of join and merge method

Change byteorder argument typing for to_stata method to literal, added definition in pandas/_typing.py

Change if_exists argument typing for to_gbq method to literal, added definition in pandas/_typing.py

Change orient argument typing for from_dict method to literal, added definition in pandas/_typing.py

Change how argument typing for to_timestamp method to literal, added definition in pandas/_typing.py

Change validate argument typing for merge and join methods to literal, added definition in pandas/_typing.py

Change na_action arguments typing for applymap method to literal, added definition in pandas/_typing.py

Change join and errors arguments typing for update method to litaral, added definition in pandas/_typing.py

Change keep argument typing for nlargest and nsallest to litaera, added definition in pandas/_typing.py

Specify the kind and na_position more precisely in sort_values, reusing type definitions in pandas/_typing.py
---
 pandas/_typing.py    | 40 ++++++++++++++++++++++++++++++++++++++++
 pandas/core/frame.py | 39 ++++++++++++++++++++++++++-------------
 2 files changed, 66 insertions(+), 13 deletions(-)

diff --git a/pandas/_typing.py b/pandas/_typing.py
index de02a549856ab..0dfe9d2b7d0b5 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -132,6 +132,8 @@
 ]
 Timezone = Union[str, tzinfo]
 
+ToTimestampHow = Literal["s", "e", "start", "end"]
+
 # NDFrameT is stricter and ensures that the same subclass of NDFrame always is
 # used. E.g. `def func(a: NDFrameT) -> NDFrameT: ...` means that if a
 # Series is passed into a function, a Series is always returned and if a DataFrame is
@@ -303,6 +305,9 @@ def closed(self) -> bool:
     str, int, Sequence[Union[str, int]], Mapping[Hashable, Union[str, int]]
 ]
 
+# Arguments for nsmalles and n_largest
+NsmallestNlargestKeep = Literal["first", "last", "all"]
+
 # Arguments for fillna()
 FillnaOptions = Literal["backfill", "bfill", "ffill", "pad"]
 
@@ -372,9 +377,29 @@ def closed(self) -> bool:
 
 # merge
 MergeHow = Literal["left", "right", "inner", "outer", "cross"]
+MergeValidate = Literal[
+    "one_to_one",
+    "1:1",
+    "one_to_many",
+    "1:m",
+    "many_to_one",
+    "m:1",
+    "many_to_many",
+    "m:m",
+]
 
 # join
 JoinHow = Literal["left", "right", "inner", "outer"]
+JoinValidate = Literal[
+    "one_to_one",
+    "1:1",
+    "one_to_many",
+    "1:m",
+    "many_to_one",
+    "m:1",
+    "many_to_many",
+    "m:m",
+]
 
 MatplotlibColor = Union[str, Sequence[float]]
 TimeGrouperOrigin = Union[
@@ -390,3 +415,18 @@ def closed(self) -> bool:
 ]
 AlignJoin = Literal["outer", "inner", "left", "right"]
 DtypeBackend = Literal["pyarrow", "numpy_nullable"]
+
+# update
+UpdateJoin = Literal["left"]
+
+# applymap
+NaAction = Literal["None", "ignore"]
+
+# from_dict[
+FromDictOrient = Literal["columns", "index", "tight"]
+
+# to_gbc
+ToGbqIfexist = Literal["fail", "replace", "append"]
+
+# to_stata
+ToStataByteorder = Literal[">", "<", "little", "big"]
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 5341b87c39676..085e2c6eb2182 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -219,12 +219,17 @@
         FloatFormatType,
         FormattersType,
         Frequency,
+        FromDictOrient,
         IgnoreRaise,
         IndexKeyFunc,
         IndexLabel,
+        JoinValidate,
         Level,
         MergeHow,
+        MergeValidate,
+        NaAction,
         NaPosition,
+        NsmallestNlargestKeep,
         PythonFuncType,
         QuantileInterpolation,
         ReadBuffer,
@@ -234,6 +239,10 @@
         SortKind,
         StorageOptions,
         Suffixes,
+        ToGbqIfexist,
+        ToStataByteorder,
+        ToTimestampHow,
+        UpdateJoin,
         ValueKeyFunc,
         WriteBuffer,
         npt,
@@ -1637,7 +1646,7 @@ def __rmatmul__(self, other) -> DataFrame:
     def from_dict(
         cls,
         data: dict,
-        orient: str = "columns",
+        orient: FromDictOrient = "columns",
         dtype: Dtype | None = None,
         columns: Axes | None = None,
     ) -> DataFrame:
@@ -1981,7 +1990,7 @@ def to_gbq(
         project_id: str | None = None,
         chunksize: int | None = None,
         reauth: bool = False,
-        if_exists: str = "fail",
+        if_exists: ToGbqIfexist = "fail",
         auth_local_webserver: bool = True,
         table_schema: list[dict[str, str]] | None = None,
         location: str | None = None,
@@ -2535,7 +2544,7 @@ def to_stata(
         *,
         convert_dates: dict[Hashable, str] | None = None,
         write_index: bool = True,
-        byteorder: str | None = None,
+        byteorder: ToStataByteorder | None = None,
         time_stamp: datetime.datetime | None = None,
         data_label: str | None = None,
         variable_labels: dict[Hashable, str] | None = None,
@@ -6521,8 +6530,8 @@ def sort_values(
         axis: Axis = ...,
         ascending=...,
         inplace: Literal[False] = ...,
-        kind: str = ...,
-        na_position: str = ...,
+        kind: SortKind = ...,
+        na_position: NaPosition = ...,
         ignore_index: bool = ...,
         key: ValueKeyFunc = ...,
     ) -> DataFrame:
@@ -7077,7 +7086,9 @@ def value_counts(
 
         return counts
 
-    def nlargest(self, n: int, columns: IndexLabel, keep: str = "first") -> DataFrame:
+    def nlargest(
+        self, n: int, columns: IndexLabel, keep: NsmallestNlargestKeep = "first"
+    ) -> DataFrame:
         """
         Return the first `n` rows ordered by `columns` in descending order.
 
@@ -7184,7 +7195,9 @@ def nlargest(self, n: int, columns: IndexLabel, keep: str = "first") -> DataFram
         """
         return selectn.SelectNFrame(self, n=n, keep=keep, columns=columns).nlargest()
 
-    def nsmallest(self, n: int, columns: IndexLabel, keep: str = "first") -> DataFrame:
+    def nsmallest(
+        self, n: int, columns: IndexLabel, keep: NsmallestNlargestKeep = "first"
+    ) -> DataFrame:
         """
         Return the first `n` rows ordered by `columns` in ascending order.
 
@@ -8348,10 +8361,10 @@ def combiner(x, y):
     def update(
         self,
         other,
-        join: str = "left",
+        join: UpdateJoin = "left",
         overwrite: bool = True,
         filter_func=None,
-        errors: str = "ignore",
+        errors: IgnoreRaise = "ignore",
     ) -> None:
         """
         Modify in place using non-NA values from another DataFrame.
@@ -9857,7 +9870,7 @@ def infer(x):
         return self.apply(infer).__finalize__(self, "map")
 
     def applymap(
-        self, func: PythonFuncType, na_action: str | None = None, **kwargs
+        self, func: PythonFuncType, na_action: NaAction = None, **kwargs
     ) -> DataFrame:
         """
         Apply a function to a Dataframe elementwise.
@@ -9969,7 +9982,7 @@ def join(
         lsuffix: str = "",
         rsuffix: str = "",
         sort: bool = False,
-        validate: str | None = None,
+        validate: JoinValidate | None = None,
     ) -> DataFrame:
         """
         Join columns of another DataFrame.
@@ -10211,7 +10224,7 @@ def merge(
         suffixes: Suffixes = ("_x", "_y"),
         copy: bool | None = None,
         indicator: str | bool = False,
-        validate: str | None = None,
+        validate: MergeValidate | None = None,
     ) -> DataFrame:
         from pandas.core.reshape.merge import merge
 
@@ -11506,7 +11519,7 @@ def quantile(
     def to_timestamp(
         self,
         freq: Frequency | None = None,
-        how: str = "start",
+        how: ToTimestampHow = "start",
         axis: Axis = 0,
         copy: bool | None = None,
     ) -> DataFrame:

From 7e1c7c9e7374b157eecd1d3e24698590d09ce565 Mon Sep 17 00:00:00 2001
From: benedikt-mangold <48798074+benedikt-mangold@users.noreply.github.com>
Date: Tue, 18 Apr 2023 17:34:08 +0200
Subject: [PATCH 3/8] removing none from literal and adding it to the argument
 of applymap

---
 pandas/_typing.py    | 4 ++--
 pandas/core/frame.py | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/_typing.py b/pandas/_typing.py
index c7f023b11ad41..41247a90678dd 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -402,7 +402,7 @@ def closed(self) -> bool:
 ]
 
 # reindex
-ReindexMethod = Union[FillnaOptions ,Literal["nearest"]]
+ReindexMethod = Union[FillnaOptions, Literal["nearest"]]
 
 MatplotlibColor = Union[str, Sequence[float]]
 TimeGrouperOrigin = Union[
@@ -423,7 +423,7 @@ def closed(self) -> bool:
 UpdateJoin = Literal["left"]
 
 # applymap
-NaAction = Literal["None", "ignore"]
+NaAction = Literal["ignore"]
 
 # from_dict[
 FromDictOrient = Literal["columns", "index", "tight"]
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 2f947ad780ad0..8c8125f5cf788 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -233,6 +233,7 @@
         PythonFuncType,
         QuantileInterpolation,
         ReadBuffer,
+        ReindexMethod,
         Renamer,
         Scalar,
         Self,
@@ -245,9 +246,8 @@
         UpdateJoin,
         ValueKeyFunc,
         WriteBuffer,
-        npt,
-        ReindexMethod,
         XMLParsers,
+        npt,
     )
 
     from pandas.core.groupby.generic import DataFrameGroupBy
@@ -9872,7 +9872,7 @@ def infer(x):
         return self.apply(infer).__finalize__(self, "map")
 
     def applymap(
-        self, func: PythonFuncType, na_action: NaAction = None, **kwargs
+        self, func: PythonFuncType, na_action: NaAction | None = None, **kwargs
     ) -> DataFrame:
         """
         Apply a function to a Dataframe elementwise.

From 748c47f31a581cab79e8a1be3a1253bacff61274 Mon Sep 17 00:00:00 2001
From: benedikt-mangold <48798074+benedikt-mangold@users.noreply.github.com>
Date: Tue, 18 Apr 2023 18:33:17 +0200
Subject: [PATCH 4/8] adding reindex literal to super class NDFrame as it
 violates the Liskov substitution principle otherwise

---
 pandas/core/generic.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 582a043a8a78a..839093a215cba 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -69,6 +69,7 @@
     NaPosition,
     NDFrameT,
     RandomState,
+    ReindexMethod,
     Renamer,
     Scalar,
     Self,
@@ -5153,7 +5154,7 @@ def reindex(
         index=None,
         columns=None,
         axis: Axis | None = None,
-        method: str | None = None,
+        method: ReindexMethod | None = None,
         copy: bool_t | None = None,
         level: Level | None = None,
         fill_value: Scalar | None = np.nan,

From 7ff3b1905aaeb09823e5ca244a29b5d5054af9a2 Mon Sep 17 00:00:00 2001
From: benedikt-mangold <48798074+benedikt-mangold@users.noreply.github.com>
Date: Tue, 18 Apr 2023 20:17:05 +0200
Subject: [PATCH 5/8] adding reindex literal to super class NDFrame as it
 violates the Liskov substitution principle otherwise

---
 pandas/core/series.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index adb16c2f2dd55..4a82eafadb09e 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -167,6 +167,7 @@
         NumpySorter,
         NumpyValueArrayLike,
         QuantileInterpolation,
+        ReindexMethod,
         Renamer,
         Scalar,
         Self,
@@ -4725,7 +4726,7 @@ def reindex(  # type: ignore[override]
         index=None,
         *,
         axis: Axis | None = None,
-        method: str | None = None,
+        method: ReindexMethod | None = None,
         copy: bool | None = None,
         level: Level | None = None,
         fill_value: Scalar | None = None,

From fd99bc433e61c4fdd8a8b92d7c0c7d896fb35001 Mon Sep 17 00:00:00 2001
From: benedikt-mangold <48798074+benedikt-mangold@users.noreply.github.com>
Date: Tue, 18 Apr 2023 21:32:16 +0200
Subject: [PATCH 6/8] adding literal to missing.py

---
 pandas/core/missing.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/missing.py b/pandas/core/missing.py
index aaed431f890d3..585ad50ad9069 100644
--- a/pandas/core/missing.py
+++ b/pandas/core/missing.py
@@ -25,6 +25,7 @@
     Axis,
     AxisInt,
     F,
+    ReindexMethod,
     npt,
 )
 from pandas.compat._optional import import_optional_dependency
@@ -949,7 +950,7 @@ def get_fill_func(method, ndim: int = 1):
     return {"pad": _pad_2d, "backfill": _backfill_2d}[method]
 
 
-def clean_reindex_fill_method(method) -> str | None:
+def clean_reindex_fill_method(method) -> ReindexMethod | None:
     return clean_fill_method(method, allow_nearest=True)
 
 

From 2d5667d5eaa834188e5e2df7b8dbba59a462445c Mon Sep 17 00:00:00 2001
From: benedikt-mangold <48798074+benedikt-mangold@users.noreply.github.com>
Date: Wed, 19 Apr 2023 07:42:59 +0200
Subject: [PATCH 7/8] ignore type for orient in from_dict method of frame due
 to mypy error

---
 pandas/_typing.py    | 2 +-
 pandas/core/frame.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/_typing.py b/pandas/_typing.py
index 41247a90678dd..62fe31fec26b7 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -425,7 +425,7 @@ def closed(self) -> bool:
 # applymap
 NaAction = Literal["ignore"]
 
-# from_dict[
+# from_dict
 FromDictOrient = Literal["columns", "index", "tight"]
 
 # to_gbc
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 8c8125f5cf788..bd298b8d723b8 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1735,7 +1735,7 @@ def from_dict(
            c   2  4
         """
         index = None
-        orient = orient.lower()
+        orient = orient.lower()  # type: ignore[assignment]
         if orient == "index":
             if len(data) > 0:
                 # TODO speed up Series case

From c007b53b63747255ab8064db04187a78d2cc9030 Mon Sep 17 00:00:00 2001
From: benedikt-mangold <48798074+benedikt-mangold@users.noreply.github.com>
Date: Thu, 20 Apr 2023 10:16:18 +0200
Subject: [PATCH 8/8] pulling main and resolving merge conflict

---
 pandas/_typing.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pandas/_typing.py b/pandas/_typing.py
index 71b8e0777b6dc..e162f7f1662ee 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -363,6 +363,9 @@ def closed(self) -> bool:
 SortKind = Literal["quicksort", "mergesort", "heapsort", "stable"]
 NaPosition = Literal["first", "last"]
 
+# Arguments for nsmalles and n_largest
+NsmallestNlargestKeep = Literal["first", "last", "all"]
+
 # quantile interpolation
 QuantileInterpolation = Literal["linear", "lower", "higher", "midpoint", "nearest"]