From f5b303e47d73c1bd09072e4f010ff4fdd189ff39 Mon Sep 17 00:00:00 2001
From: Michael Kakehashi <mkakehashi@icsecurity.com>
Date: Tue, 3 Dec 2019 15:10:08 -0800
Subject: [PATCH 01/19] added f strings and typing to frame.py

---
 pandas/core/frame.py | 98 +++++++++++++++++++++-----------------------
 1 file changed, 46 insertions(+), 52 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index fde3d1657b4f2..4690713ce574a 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -475,7 +475,7 @@ def __init__(
             except (ValueError, TypeError) as e:
                 exc = TypeError(
                     "DataFrame constructor called with "
-                    "incompatible data and dtype: {e}".format(e=e)
+                    f"incompatible data and dtype: {e}"
                 )
                 raise exc from e
 
@@ -1112,8 +1112,7 @@ def dot(self, other):
             rvals = np.asarray(other)
             if lvals.shape[1] != rvals.shape[0]:
                 raise ValueError(
-                    "Dot product shape mismatch, "
-                    "{s} vs {r}".format(s=lvals.shape, r=rvals.shape)
+                    "Dot product shape mismatch, " f"{lvals.shape} vs {rvals.shape}"
                 )
 
         if isinstance(other, DataFrame):
@@ -1129,7 +1128,7 @@ def dot(self, other):
             else:
                 return Series(result, index=left.index)
         else:  # pragma: no cover
-            raise TypeError("unsupported type: {oth}".format(oth=type(other)))
+            raise TypeError(f"unsupported type: {type(other)}")
 
     def __matmul__(self, other):
         """
@@ -1417,7 +1416,7 @@ def to_dict(self, orient="dict", into=dict):
                 for t in self.itertuples(name=None)
             )
         else:
-            raise ValueError("orient '{o}' not understood".format(o=orient))
+            raise ValueError(f"orient '{orient}' not understood")
 
     def to_gbq(
         self,
@@ -1836,9 +1835,7 @@ def to_records(self, index=True, column_dtypes=None, index_dtypes=None):
                 formats.append(dtype_mapping)
             else:
                 element = "row" if i < index_len else "column"
-                msg = ("Invalid dtype {dtype} specified for {element} {name}").format(
-                    dtype=dtype_mapping, element=element, name=name
-                )
+                msg = f"Invalid dtype {dtype_mapping} specified for {element} {name}"
                 raise ValueError(msg)
 
         return np.rec.fromarrays(arrays, dtype={"names": names, "formats": formats})
@@ -2307,7 +2304,7 @@ def info(
         lines.append(self.index._summary())
 
         if len(self.columns) == 0:
-            lines.append("Empty {name}".format(name=type(self).__name__))
+            lines.append(f"Empty {type(self).__name__}")
             fmt.buffer_put_lines(buf, lines)
             return
 
@@ -2335,10 +2332,7 @@ def _verbose_repr():
                 counts = self.count()
                 if len(cols) != len(counts):  # pragma: no cover
                     raise AssertionError(
-                        "Columns must equal counts "
-                        "({cols:d} != {counts:d})".format(
-                            cols=len(cols), counts=len(counts)
-                        )
+                        "Columns must equal counts " f"({len(cols)} != {len(counts)})"
                     )
                 tmpl = "{count} non-null {dtype}"
 
@@ -2382,7 +2376,7 @@ def _sizeof_fmt(num, size_qualifier):
 
         counts = self._data.get_dtype_counts()
         dtypes = ["{k}({kk:d})".format(k=k[0], kk=k[1]) for k in sorted(counts.items())]
-        lines.append("dtypes: {types}".format(types=", ".join(dtypes)))
+        lines.append(f"dtypes: {', '.join(dtypes)}")
 
         if memory_usage is None:
             memory_usage = get_option("display.memory_usage")
@@ -2399,12 +2393,7 @@ def _sizeof_fmt(num, size_qualifier):
                 if "object" in counts or self.index._is_memory_usage_qualified():
                     size_qualifier = "+"
             mem_usage = self.memory_usage(index=True, deep=deep).sum()
-            lines.append(
-                "memory usage: {mem}\n".format(
-                    mem=_sizeof_fmt(mem_usage, size_qualifier)
-                )
-            )
-
+            lines.append(f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}\n")
         fmt.buffer_put_lines(buf, lines)
 
     def memory_usage(self, index=True, deep=False):
@@ -3069,8 +3058,8 @@ def query(self, expr, inplace=False, **kwargs):
         """
         inplace = validate_bool_kwarg(inplace, "inplace")
         if not isinstance(expr, str):
-            msg = "expr must be a string to be evaluated, {0} given"
-            raise ValueError(msg.format(type(expr)))
+            msg = f"expr must be a string to be evaluated, {type(expr)} given"
+            raise ValueError(msg)
         kwargs["level"] = kwargs.pop("level", 0) + 1
         kwargs["target"] = None
         res = self.eval(expr, **kwargs)
@@ -3287,11 +3276,7 @@ def select_dtypes(self, include=None, exclude=None):
 
         # can't both include AND exclude!
         if not include.isdisjoint(exclude):
-            raise ValueError(
-                "include and exclude overlap on {inc_ex}".format(
-                    inc_ex=(include & exclude)
-                )
-            )
+            raise ValueError(f"include and exclude overlap on {(include & exclude)}")
 
         # We raise when both include and exclude are empty
         # Hence, we can just shrink the columns we want to keep
@@ -4128,15 +4113,13 @@ def set_index(
                 try:
                     found = col in self.columns
                 except TypeError:
-                    raise TypeError(
-                        err_msg + " Received column of type {}".format(type(col))
-                    )
+                    raise TypeError(err_msg + f" Received column of type {type(col)}")
                 else:
                     if not found:
                         missing.append(col)
 
         if missing:
-            raise KeyError("None of {} are in the columns".format(missing))
+            raise KeyError(f"None of {missing} are in the columns")
 
         if inplace:
             frame = self
@@ -4180,17 +4163,15 @@ def set_index(
                 # check newest element against length of calling frame, since
                 # ensure_index_from_sequences would not raise for append=False.
                 raise ValueError(
-                    "Length mismatch: Expected {len_self} rows, "
-                    "received array of length {len_col}".format(
-                        len_self=len(self), len_col=len(arrays[-1])
-                    )
+                    f"Length mismatch: Expected {len(self)} rows, "
+                    f"received array of length {len(arrays[-1])}"
                 )
 
         index = ensure_index_from_sequences(arrays, names)
 
         if verify_integrity and not index.is_unique:
             duplicates = index[index.duplicated()].unique()
-            raise ValueError("Index has duplicate keys: {dup}".format(dup=duplicates))
+            raise ValueError(f"Index has duplicate keys: {duplicates}")
 
         # use set to handle duplicate column names gracefully in case of drop
         for c in set(to_remove):
@@ -4205,8 +4186,13 @@ def set_index(
             return frame
 
     def reset_index(
-        self, level=None, drop=False, inplace=False, col_level=0, col_fill=""
-    ):
+        self,
+        level: Union[Hashable, Tuple, List, None] = None,
+        drop: bool = False,
+        inplace: bool = False,
+        col_level: Hashable = 0,
+        col_fill: Union[Hashable, None] = "",
+    ) -> pd.DataFrame:
         """
         Reset the index, or a level of it.
 
@@ -4423,7 +4409,7 @@ def _maybe_casted_values(index, labels=None):
                             raise ValueError(
                                 "col_fill=None is incompatible "
                                 "with incomplete column name "
-                                "{}".format(name)
+                                f"{name}"
                             )
                         col_fill = col_name[0]
 
@@ -4589,7 +4575,7 @@ def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False):
             mask = count > 0
         else:
             if how is not None:
-                raise ValueError("invalid how option: {h}".format(h=how))
+                raise ValueError(f"invalid how option: {how}")
             else:
                 raise TypeError("must specify how or thresh")
 
@@ -4600,7 +4586,12 @@ def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False):
         else:
             return result
 
-    def drop_duplicates(self, subset=None, keep="first", inplace=False):
+    def drop_duplicates(
+        self,
+        subset: Union[Sequence[Hashable], Hashable] = None,
+        keep: Union[str, bool] = "first",
+        inplace: bool = False,
+    ) -> pd.DataFrame:
         """
         Return DataFrame with duplicate rows removed.
 
@@ -4637,7 +4628,11 @@ def drop_duplicates(self, subset=None, keep="first", inplace=False):
         else:
             return self[-duplicated]
 
-    def duplicated(self, subset=None, keep="first"):
+    def duplicated(
+        self,
+        subset: Union[Sequence[Hashable], Hashable] = None,
+        keep: Union[str, bool] = "first",
+    ) -> pd.Series:
         """
         Return boolean Series denoting duplicate rows.
 
@@ -7208,7 +7203,7 @@ def corr(self, method="pearson", min_periods=1):
             raise ValueError(
                 "method must be either 'pearson', "
                 "'spearman', 'kendall', or a callable, "
-                "'{method}' was supplied".format(method=method)
+                f"'{method}' was supplied"
             )
 
         return self._constructor(correl, index=idx, columns=cols)
@@ -7399,9 +7394,9 @@ def c(x):
 
         else:
             raise ValueError(
-                "Invalid method {method} was passed, "
+                f"Invalid method {method} was passed, "
                 "valid methods are: 'pearson', 'kendall', "
-                "'spearman', or callable".format(method=method)
+                "'spearman', or callable"
             )
 
         if not drop:
@@ -7531,8 +7526,7 @@ def _count_level(self, level, axis=0, numeric_only=False):
 
         if not isinstance(count_axis, ABCMultiIndex):
             raise TypeError(
-                "Can only count levels on hierarchical "
-                "{ax}.".format(ax=self._get_axis_name(axis))
+                "Can only count levels on hierarchical " f"{self._get_axis_name(axis)}."
             )
 
         if frame._is_mixed_type:
@@ -7590,8 +7584,8 @@ def _get_data(axis_matters):
                     data = self._get_bool_data()
             else:  # pragma: no cover
                 msg = (
-                    "Generating numeric_only data with filter_type {f}"
-                    "not supported.".format(f=filter_type)
+                    f"Generating numeric_only data with filter_type {filter_type}"
+                    "not supported."
                 )
                 raise NotImplementedError(msg)
             return data
@@ -8000,7 +7994,7 @@ def to_timestamp(self, freq=None, how="start", axis=0, copy=True):
         elif axis == 1:
             new_data.set_axis(0, self.columns.to_timestamp(freq=freq, how=how))
         else:  # pragma: no cover
-            raise AssertionError("Axis must be 0 or 1. Got {ax!s}".format(ax=axis))
+            raise AssertionError(f"Axis must be 0 or 1. Got {axis}")
 
         return self._constructor(new_data)
 
@@ -8034,7 +8028,7 @@ def to_period(self, freq=None, axis=0, copy=True):
         elif axis == 1:
             new_data.set_axis(0, self.columns.to_period(freq=freq))
         else:  # pragma: no cover
-            raise AssertionError("Axis must be 0 or 1. Got {ax!s}".format(ax=axis))
+            raise AssertionError(f"Axis must be 0 or 1. Got {axis}")
 
         return self._constructor(new_data)
 
@@ -8170,4 +8164,4 @@ def _from_nested_dict(data):
 
 
 def _put_str(s, space):
-    return "{s}".format(s=s)[:space].ljust(space)
+    return f"{s}"[:space].ljust(space)

From c3fd3082ebaffb1f218943a2d69707bfcb238222 Mon Sep 17 00:00:00 2001
From: Michael Kakehashi <mkakehashi@icsecurity.com>
Date: Tue, 3 Dec 2019 16:48:07 -0800
Subject: [PATCH 02/19] minor fix

---
 pandas/core/frame.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 4690713ce574a..31e7963ffb12d 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4192,7 +4192,7 @@ def reset_index(
         inplace: bool = False,
         col_level: Hashable = 0,
         col_fill: Union[Hashable, None] = "",
-    ) -> pd.DataFrame:
+    ) -> DataFrame:
         """
         Reset the index, or a level of it.
 
@@ -4591,7 +4591,7 @@ def drop_duplicates(
         subset: Union[Sequence[Hashable], Hashable] = None,
         keep: Union[str, bool] = "first",
         inplace: bool = False,
-    ) -> pd.DataFrame:
+    ) -> DataFrame:
         """
         Return DataFrame with duplicate rows removed.
 
@@ -8117,8 +8117,8 @@ def isin(self, values):
         else:
             if not is_list_like(values):
                 raise TypeError(
-                    f"only list-like or dict-like objects are allowed "
-                    f"to be passed to DataFrame.isin(), "
+                    "only list-like or dict-like objects are allowed "
+                    "to be passed to DataFrame.isin(), "
                     f"you passed a {repr(type(values).__name__)}"
                 )
             return DataFrame(

From 3a4c2448982ee26b829479f77c1a161150ff0b70 Mon Sep 17 00:00:00 2001
From: Michael Kakehashi <mkakehashi@icsecurity.com>
Date: Tue, 3 Dec 2019 19:37:51 -0800
Subject: [PATCH 03/19] cleaned up f strings, and flack 8 errors per PR
 comments

---
 pandas/core/frame.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 31e7963ffb12d..23e1006ce3802 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2332,7 +2332,7 @@ def _verbose_repr():
                 counts = self.count()
                 if len(cols) != len(counts):  # pragma: no cover
                     raise AssertionError(
-                        "Columns must equal counts " f"({len(cols)} != {len(counts)})"
+                        f"Columns must equal counts ({len(cols)} != {len(counts)})"
                     )
                 tmpl = "{count} non-null {dtype}"
 
@@ -4113,7 +4113,11 @@ def set_index(
                 try:
                     found = col in self.columns
                 except TypeError:
-                    raise TypeError(err_msg + f" Received column of type {type(col)}")
+                    raise TypeError(
+                        'The parameter "keys" may be a column key, one-dimensional '
+                        "array, or a list containing only valid column keys and "
+                        f"one-dimensional arrays. Received column of type {type(col)}"
+                    )
                 else:
                     if not found:
                         missing.append(col)
@@ -4192,7 +4196,7 @@ def reset_index(
         inplace: bool = False,
         col_level: Hashable = 0,
         col_fill: Union[Hashable, None] = "",
-    ) -> DataFrame:
+    ) -> pandas.DataFrame:
         """
         Reset the index, or a level of it.
 
@@ -4591,7 +4595,7 @@ def drop_duplicates(
         subset: Union[Sequence[Hashable], Hashable] = None,
         keep: Union[str, bool] = "first",
         inplace: bool = False,
-    ) -> DataFrame:
+    ) -> pandas.DataFrame:
         """
         Return DataFrame with duplicate rows removed.
 
@@ -4632,7 +4636,7 @@ def duplicated(
         self,
         subset: Union[Sequence[Hashable], Hashable] = None,
         keep: Union[str, bool] = "first",
-    ) -> pd.Series:
+    ) -> pandas.Series:
         """
         Return boolean Series denoting duplicate rows.
 

From ef87c64db68700d32fcb8eb13814b1e57b4f2d15 Mon Sep 17 00:00:00 2001
From: Michael Kakehashi <mkakehashi@icsecurity.com>
Date: Tue, 3 Dec 2019 20:04:31 -0800
Subject: [PATCH 04/19] fixed return annotation of functions that return a
 DataFrame

---
 pandas/core/frame.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 23e1006ce3802..75016cb9a2dcd 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4196,7 +4196,7 @@ def reset_index(
         inplace: bool = False,
         col_level: Hashable = 0,
         col_fill: Union[Hashable, None] = "",
-    ) -> pandas.DataFrame:
+    ) -> "DataFrame":
         """
         Reset the index, or a level of it.
 
@@ -4595,7 +4595,7 @@ def drop_duplicates(
         subset: Union[Sequence[Hashable], Hashable] = None,
         keep: Union[str, bool] = "first",
         inplace: bool = False,
-    ) -> pandas.DataFrame:
+    ) -> "DataFrame":
         """
         Return DataFrame with duplicate rows removed.
 

From fde23a9222b8283bc23a673e504c21a9dc8c32ff Mon Sep 17 00:00:00 2001
From: Michael Kakehashi <mkakehashi@icsecurity.com>
Date: Tue, 3 Dec 2019 20:22:43 -0800
Subject: [PATCH 05/19] fixed annotation of functions that return a Series

---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 75016cb9a2dcd..6979a1db95317 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4636,7 +4636,7 @@ def duplicated(
         self,
         subset: Union[Sequence[Hashable], Hashable] = None,
         keep: Union[str, bool] = "first",
-    ) -> pandas.Series:
+    ) -> "Series":
         """
         Return boolean Series denoting duplicate rows.
 

From cf3399858a178b0e6b3d049db1f54d23f2d478f4 Mon Sep 17 00:00:00 2001
From: mck619 <mckakehashi@gmail.com>
Date: Tue, 3 Dec 2019 20:46:12 -0800
Subject: [PATCH 06/19] Update pandas/core/frame.py

Co-Authored-By: William Ayd <william.ayd@icloud.com>
---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 6979a1db95317..d16b96203d154 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4195,7 +4195,7 @@ def reset_index(
         drop: bool = False,
         inplace: bool = False,
         col_level: Hashable = 0,
-        col_fill: Union[Hashable, None] = "",
+        col_fill: Optional[Hashable] = "",
     ) -> "DataFrame":
         """
         Reset the index, or a level of it.

From d223c888717c215d033bef23cf3b4f63f6f3f490 Mon Sep 17 00:00:00 2001
From: mck619 <mckakehashi@gmail.com>
Date: Tue, 3 Dec 2019 20:46:20 -0800
Subject: [PATCH 07/19] Update pandas/core/frame.py

Co-Authored-By: William Ayd <william.ayd@icloud.com>
---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index d16b96203d154..e24cc79b7c16c 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4191,7 +4191,7 @@ def set_index(
 
     def reset_index(
         self,
-        level: Union[Hashable, Tuple, List, None] = None,
+        level: Optional[Union[Hashable, Sequence[Hashable]] = None,
         drop: bool = False,
         inplace: bool = False,
         col_level: Hashable = 0,

From 188410c265753d4270bb2c0fa3acc60cb087e966 Mon Sep 17 00:00:00 2001
From: mck619 <mckakehashi@gmail.com>
Date: Tue, 3 Dec 2019 20:46:29 -0800
Subject: [PATCH 08/19] Update pandas/core/frame.py

Co-Authored-By: William Ayd <william.ayd@icloud.com>
---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index e24cc79b7c16c..a1358d76283d2 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4114,7 +4114,7 @@ def set_index(
                     found = col in self.columns
                 except TypeError:
                     raise TypeError(
-                        'The parameter "keys" may be a column key, one-dimensional '
+                       f"{err_msg} Received column of type {type(col)}"
                         "array, or a list containing only valid column keys and "
                         f"one-dimensional arrays. Received column of type {type(col)}"
                     )

From bfdf696691bd2326ded0c59f9c78220de2ccef14 Mon Sep 17 00:00:00 2001
From: mck619 <mckakehashi@gmail.com>
Date: Tue, 3 Dec 2019 20:46:59 -0800
Subject: [PATCH 09/19] Update pandas/core/frame.py

Co-Authored-By: William Ayd <william.ayd@icloud.com>
---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index a1358d76283d2..564231a247606 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4592,7 +4592,7 @@ def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False):
 
     def drop_duplicates(
         self,
-        subset: Union[Sequence[Hashable], Hashable] = None,
+        subset: Optional[Union[Hashable, Sequence[Hashable]] = None,
         keep: Union[str, bool] = "first",
         inplace: bool = False,
     ) -> "DataFrame":

From 0ecb0008f727205daf4a12b014da8e1e1c740a98 Mon Sep 17 00:00:00 2001
From: mck619 <mckakehashi@gmail.com>
Date: Tue, 3 Dec 2019 20:47:07 -0800
Subject: [PATCH 10/19] Update pandas/core/frame.py

Co-Authored-By: William Ayd <william.ayd@icloud.com>
---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 564231a247606..4bbb2880a2676 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4634,7 +4634,7 @@ def drop_duplicates(
 
     def duplicated(
         self,
-        subset: Union[Sequence[Hashable], Hashable] = None,
+        subset: Optional[Union[Hashable, Sequence[Hashable]] = None,
         keep: Union[str, bool] = "first",
     ) -> "Series":
         """

From 7b52345db3005033a7ae1db062f5521d38b17e4b Mon Sep 17 00:00:00 2001
From: Michael Kakehashi <mkakehashi@icsecurity.com>
Date: Tue, 3 Dec 2019 21:04:58 -0800
Subject: [PATCH 11/19] typing syntax fix

---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 4bbb2880a2676..bd39f936a9e35 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4191,7 +4191,7 @@ def set_index(
 
     def reset_index(
         self,
-        level: Optional[Union[Hashable, Sequence[Hashable]] = None,
+        level: Optional[Union[Hashable, Sequence[Hashable]]] = None,
         drop: bool = False,
         inplace: bool = False,
         col_level: Hashable = 0,

From 5e7d91557bb980781708e03ce093757d56fd3035 Mon Sep 17 00:00:00 2001
From: Michael Kakehashi <mkakehashi@icsecurity.com>
Date: Tue, 3 Dec 2019 21:06:41 -0800
Subject: [PATCH 12/19] more typing syntax fixes

---
 pandas/core/frame.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index bd39f936a9e35..910eb330e663b 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4114,7 +4114,7 @@ def set_index(
                     found = col in self.columns
                 except TypeError:
                     raise TypeError(
-                       f"{err_msg} Received column of type {type(col)}"
+                        f"{err_msg} Received column of type {type(col)}"
                         "array, or a list containing only valid column keys and "
                         f"one-dimensional arrays. Received column of type {type(col)}"
                     )
@@ -4592,7 +4592,7 @@ def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False):
 
     def drop_duplicates(
         self,
-        subset: Optional[Union[Hashable, Sequence[Hashable]] = None,
+        subset: Optional[Union[Hashable, Sequence[Hashable]]] = None,
         keep: Union[str, bool] = "first",
         inplace: bool = False,
     ) -> "DataFrame":
@@ -4634,7 +4634,7 @@ def drop_duplicates(
 
     def duplicated(
         self,
-        subset: Optional[Union[Hashable, Sequence[Hashable]] = None,
+        subset: Optional[Union[Hashable, Sequence[Hashable]]] = None,
         keep: Union[str, bool] = "first",
     ) -> "Series":
         """

From 70ef86095ffe1e210ce6eb5896229940b119d872 Mon Sep 17 00:00:00 2001
From: mck619 <mckakehashi@gmail.com>
Date: Wed, 4 Dec 2019 09:23:08 -0800
Subject: [PATCH 13/19] Update pandas/core/frame.py

Co-Authored-By: Simon Hawkins <simonjayhawkins@gmail.com>
---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 910eb330e663b..368844b3be5fe 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1112,7 +1112,7 @@ def dot(self, other):
             rvals = np.asarray(other)
             if lvals.shape[1] != rvals.shape[0]:
                 raise ValueError(
-                    "Dot product shape mismatch, " f"{lvals.shape} vs {rvals.shape}"
+                    f"Dot product shape mismatch, {lvals.shape} vs {rvals.shape}"
                 )
 
         if isinstance(other, DataFrame):

From 997a2e33ed695493bc3d5f32537f245a73cc423c Mon Sep 17 00:00:00 2001
From: Michael Kakehashi <mkakehashi@icsecurity.com>
Date: Wed, 4 Dec 2019 09:27:34 -0800
Subject: [PATCH 14/19] fixed fstring with err_msg

---
 pandas/core/frame.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 910eb330e663b..3e6802ee2c72b 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4114,9 +4114,7 @@ def set_index(
                     found = col in self.columns
                 except TypeError:
                     raise TypeError(
-                        f"{err_msg} Received column of type {type(col)}"
-                        "array, or a list containing only valid column keys and "
-                        f"one-dimensional arrays. Received column of type {type(col)}"
+                        f"{err_msg}. Received column of type {type(col)}"
                     )
                 else:
                     if not found:

From a00c34dc57977a1d28a6ff763f5980463758c34a Mon Sep 17 00:00:00 2001
From: mck619 <mckakehashi@gmail.com>
Date: Wed, 4 Dec 2019 09:32:26 -0800
Subject: [PATCH 15/19] Update pandas/core/frame.py

Co-Authored-By: Simon Hawkins <simonjayhawkins@gmail.com>
---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 368844b3be5fe..6c3eff149f936 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7530,7 +7530,7 @@ def _count_level(self, level, axis=0, numeric_only=False):
 
         if not isinstance(count_axis, ABCMultiIndex):
             raise TypeError(
-                "Can only count levels on hierarchical " f"{self._get_axis_name(axis)}."
+                f"Can only count levels on hierarchical {self._get_axis_name(axis)}."
             )
 
         if frame._is_mixed_type:

From 099feb61e867b53568a72ccbbc8e8b5bb64a826d Mon Sep 17 00:00:00 2001
From: Michael Kakehashi <mkakehashi@icsecurity.com>
Date: Wed, 4 Dec 2019 09:36:16 -0800
Subject: [PATCH 16/19] fstring clean up

---
 pandas/core/frame.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 298192661e3a9..47dc6483c1b6f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4410,8 +4410,7 @@ def _maybe_casted_values(index, labels=None):
                         if len(col_name) not in (1, self.columns.nlevels):
                             raise ValueError(
                                 "col_fill=None is incompatible "
-                                "with incomplete column name "
-                                f"{name}"
+                                f"with incomplete column name {name}"
                             )
                         col_fill = col_name[0]
 
@@ -7586,7 +7585,7 @@ def _get_data(axis_matters):
                     data = self._get_bool_data()
             else:  # pragma: no cover
                 msg = (
-                    f"Generating numeric_only data with filter_type {filter_type}"
+                    f"Generating numeric_only data with filter_type {filter_type} "
                     "not supported."
                 )
                 raise NotImplementedError(msg)
@@ -8166,4 +8165,4 @@ def _from_nested_dict(data):
 
 
 def _put_str(s, space):
-    return f"{s}"[:space].ljust(space)
+    return str(s)[:space].ljust(space)

From 18fed327b296c3f3a2bab41fc6283a419aea6359 Mon Sep 17 00:00:00 2001
From: Michael Kakehashi <mkakehashi@icsecurity.com>
Date: Wed, 4 Dec 2019 09:52:09 -0800
Subject: [PATCH 17/19] black formatting

---
 pandas/core/frame.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 5457febe8b6f5..6afd64f64df24 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4113,9 +4113,7 @@ def set_index(
                 try:
                     found = col in self.columns
                 except TypeError:
-                    raise TypeError(
-                        f"{err_msg}. Received column of type {type(col)}"
-                    )
+                    raise TypeError(f"{err_msg}. Received column of type {type(col)}")
                 else:
                     if not found:
                         missing.append(col)

From 17444ec7e8256d7d8e11278a43e78465dc0beb23 Mon Sep 17 00:00:00 2001
From: Michael Kakehashi <mkakehashi@icsecurity.com>
Date: Thu, 5 Dec 2019 15:42:09 -0800
Subject: [PATCH 18/19] mypy fixes per Simon's comments

---
 pandas/core/frame.py         | 21 +++++++++++++++++----
 pandas/core/reshape/merge.py |  5 ++++-
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 6afd64f64df24..871e2066ae6c1 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -15,6 +15,7 @@
 import sys
 from textwrap import dedent
 from typing import (
+    Any,
     FrozenSet,
     Hashable,
     Iterable,
@@ -25,6 +26,7 @@
     Tuple,
     Type,
     Union,
+    cast,
 )
 import warnings
 
@@ -4192,7 +4194,7 @@ def reset_index(
         inplace: bool = False,
         col_level: Hashable = 0,
         col_fill: Optional[Hashable] = "",
-    ) -> "DataFrame":
+    ) -> Optional["DataFrame"]:
         """
         Reset the index, or a level of it.
 
@@ -4220,8 +4222,8 @@ def reset_index(
 
         Returns
         -------
-        DataFrame
-            DataFrame with the new index.
+        DataFrame or None
+            DataFrame with the new index or None if ``inplace=True``.
 
         See Also
         --------
@@ -4386,6 +4388,7 @@ def _maybe_casted_values(index, labels=None):
                 new_index = self.index.droplevel(level)
 
         if not drop:
+            to_insert: Iterable[Tuple[Any, Optional[Any]]]
             if isinstance(self.index, ABCMultiIndex):
                 names = [
                     (n if n is not None else f"level_{i}")
@@ -4425,6 +4428,8 @@ def _maybe_casted_values(index, labels=None):
         if not inplace:
             return new_obj
 
+        return None
+
     # ----------------------------------------------------------------------
     # Reindex-based selection methods
 
@@ -4590,7 +4595,7 @@ def drop_duplicates(
         subset: Optional[Union[Hashable, Sequence[Hashable]]] = None,
         keep: Union[str, bool] = "first",
         inplace: bool = False,
-    ) -> "DataFrame":
+    ) -> Optional["DataFrame"]:
         """
         Return DataFrame with duplicate rows removed.
 
@@ -4613,6 +4618,7 @@ def drop_duplicates(
         Returns
         -------
         DataFrame
+            DataFrame with duplicates removed or None if ``inplace=True``
         """
         if self.empty:
             return self.copy()
@@ -4627,6 +4633,8 @@ def drop_duplicates(
         else:
             return self[-duplicated]
 
+        return None
+
     def duplicated(
         self,
         subset: Optional[Union[Hashable, Sequence[Hashable]]] = None,
@@ -4675,6 +4683,9 @@ def f(vals):
         ):
             subset = (subset,)
 
+        #  needed for mypy since can't narrow types using np.iterable
+        subset = cast(Iterable, subset)
+
         # Verify all columns in subset exist in the queried dataframe
         # Otherwise, raise a KeyError, same as if you try to __getitem__ with a
         # key that doesn't exist.
@@ -6024,6 +6035,8 @@ def explode(self, column: Union[str, Tuple]) -> "DataFrame":
             raise ValueError("columns must be unique")
 
         df = self.reset_index(drop=True)
+        # TODO: use overload to refine return type of reset_index
+        assert df is not None  # needed for mypy
         result = df[column].explode()
         result = df.drop([column], axis=1).join(result)
         result.index = self.index.take(result.index)
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index d671fff568891..726a59ca8e008 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -126,7 +126,10 @@ def _groupby_and_merge(
                 on = [on]
 
             if right.duplicated(by + on).any():
-                right = right.drop_duplicates(by + on, keep="last")
+                _right = right.drop_duplicates(by + on, keep="last")
+                # TODO: use overload to refine return type of drop_duplicates
+                assert _right is not None  # needed for mypy
+                right = _right
         rby = right.groupby(by, sort=False)
     except KeyError:
         rby = None

From 1a9c6f0504dbaea65427214a89000e9febf0313f Mon Sep 17 00:00:00 2001
From: Michael Kakehashi <mkakehashi@icsecurity.com>
Date: Thu, 5 Dec 2019 16:04:50 -0800
Subject: [PATCH 19/19] doc string fix

---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 871e2066ae6c1..88967b13c89b5 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4618,7 +4618,7 @@ def drop_duplicates(
         Returns
         -------
         DataFrame
-            DataFrame with duplicates removed or None if ``inplace=True``
+            DataFrame with duplicates removed or None if ``inplace=True``.
         """
         if self.empty:
             return self.copy()