From f5b303e47d73c1bd09072e4f010ff4fdd189ff39 Mon Sep 17 00:00:00 2001 From: Michael Kakehashi Date: Tue, 3 Dec 2019 15:10:08 -0800 Subject: [PATCH 01/19] added f strings and typing to frame.py --- pandas/core/frame.py | 98 +++++++++++++++++++++----------------------- 1 file changed, 46 insertions(+), 52 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index fde3d1657b4f2..4690713ce574a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -475,7 +475,7 @@ def __init__( except (ValueError, TypeError) as e: exc = TypeError( "DataFrame constructor called with " - "incompatible data and dtype: {e}".format(e=e) + f"incompatible data and dtype: {e}" ) raise exc from e @@ -1112,8 +1112,7 @@ def dot(self, other): rvals = np.asarray(other) if lvals.shape[1] != rvals.shape[0]: raise ValueError( - "Dot product shape mismatch, " - "{s} vs {r}".format(s=lvals.shape, r=rvals.shape) + "Dot product shape mismatch, " f"{lvals.shape} vs {rvals.shape}" ) if isinstance(other, DataFrame): @@ -1129,7 +1128,7 @@ def dot(self, other): else: return Series(result, index=left.index) else: # pragma: no cover - raise TypeError("unsupported type: {oth}".format(oth=type(other))) + raise TypeError(f"unsupported type: {type(other)}") def __matmul__(self, other): """ @@ -1417,7 +1416,7 @@ def to_dict(self, orient="dict", into=dict): for t in self.itertuples(name=None) ) else: - raise ValueError("orient '{o}' not understood".format(o=orient)) + raise ValueError(f"orient '{orient}' not understood") def to_gbq( self, @@ -1836,9 +1835,7 @@ def to_records(self, index=True, column_dtypes=None, index_dtypes=None): formats.append(dtype_mapping) else: element = "row" if i < index_len else "column" - msg = ("Invalid dtype {dtype} specified for {element} {name}").format( - dtype=dtype_mapping, element=element, name=name - ) + msg = f"Invalid dtype {dtype_mapping} specified for {element} {name}" raise ValueError(msg) return np.rec.fromarrays(arrays, dtype={"names": names, "formats": formats}) @@ -2307,7 +2304,7 @@ def info( lines.append(self.index._summary()) if len(self.columns) == 0: - lines.append("Empty {name}".format(name=type(self).__name__)) + lines.append(f"Empty {type(self).__name__}") fmt.buffer_put_lines(buf, lines) return @@ -2335,10 +2332,7 @@ def _verbose_repr(): counts = self.count() if len(cols) != len(counts): # pragma: no cover raise AssertionError( - "Columns must equal counts " - "({cols:d} != {counts:d})".format( - cols=len(cols), counts=len(counts) - ) + "Columns must equal counts " f"({len(cols)} != {len(counts)})" ) tmpl = "{count} non-null {dtype}" @@ -2382,7 +2376,7 @@ def _sizeof_fmt(num, size_qualifier): counts = self._data.get_dtype_counts() dtypes = ["{k}({kk:d})".format(k=k[0], kk=k[1]) for k in sorted(counts.items())] - lines.append("dtypes: {types}".format(types=", ".join(dtypes))) + lines.append(f"dtypes: {', '.join(dtypes)}") if memory_usage is None: memory_usage = get_option("display.memory_usage") @@ -2399,12 +2393,7 @@ def _sizeof_fmt(num, size_qualifier): if "object" in counts or self.index._is_memory_usage_qualified(): size_qualifier = "+" mem_usage = self.memory_usage(index=True, deep=deep).sum() - lines.append( - "memory usage: {mem}\n".format( - mem=_sizeof_fmt(mem_usage, size_qualifier) - ) - ) - + lines.append(f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}\n") fmt.buffer_put_lines(buf, lines) def memory_usage(self, index=True, deep=False): @@ -3069,8 +3058,8 @@ def query(self, expr, inplace=False, **kwargs): """ inplace = validate_bool_kwarg(inplace, "inplace") if not isinstance(expr, str): - msg = "expr must be a string to be evaluated, {0} given" - raise ValueError(msg.format(type(expr))) + msg = f"expr must be a string to be evaluated, {type(expr)} given" + raise ValueError(msg) kwargs["level"] = kwargs.pop("level", 0) + 1 kwargs["target"] = None res = self.eval(expr, **kwargs) @@ -3287,11 +3276,7 @@ def select_dtypes(self, include=None, exclude=None): # can't both include AND exclude! if not include.isdisjoint(exclude): - raise ValueError( - "include and exclude overlap on {inc_ex}".format( - inc_ex=(include & exclude) - ) - ) + raise ValueError(f"include and exclude overlap on {(include & exclude)}") # We raise when both include and exclude are empty # Hence, we can just shrink the columns we want to keep @@ -4128,15 +4113,13 @@ def set_index( try: found = col in self.columns except TypeError: - raise TypeError( - err_msg + " Received column of type {}".format(type(col)) - ) + raise TypeError(err_msg + f" Received column of type {type(col)}") else: if not found: missing.append(col) if missing: - raise KeyError("None of {} are in the columns".format(missing)) + raise KeyError(f"None of {missing} are in the columns") if inplace: frame = self @@ -4180,17 +4163,15 @@ def set_index( # check newest element against length of calling frame, since # ensure_index_from_sequences would not raise for append=False. raise ValueError( - "Length mismatch: Expected {len_self} rows, " - "received array of length {len_col}".format( - len_self=len(self), len_col=len(arrays[-1]) - ) + f"Length mismatch: Expected {len(self)} rows, " + f"received array of length {len(arrays[-1])}" ) index = ensure_index_from_sequences(arrays, names) if verify_integrity and not index.is_unique: duplicates = index[index.duplicated()].unique() - raise ValueError("Index has duplicate keys: {dup}".format(dup=duplicates)) + raise ValueError(f"Index has duplicate keys: {duplicates}") # use set to handle duplicate column names gracefully in case of drop for c in set(to_remove): @@ -4205,8 +4186,13 @@ def set_index( return frame def reset_index( - self, level=None, drop=False, inplace=False, col_level=0, col_fill="" - ): + self, + level: Union[Hashable, Tuple, List, None] = None, + drop: bool = False, + inplace: bool = False, + col_level: Hashable = 0, + col_fill: Union[Hashable, None] = "", + ) -> pd.DataFrame: """ Reset the index, or a level of it. @@ -4423,7 +4409,7 @@ def _maybe_casted_values(index, labels=None): raise ValueError( "col_fill=None is incompatible " "with incomplete column name " - "{}".format(name) + f"{name}" ) col_fill = col_name[0] @@ -4589,7 +4575,7 @@ def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False): mask = count > 0 else: if how is not None: - raise ValueError("invalid how option: {h}".format(h=how)) + raise ValueError(f"invalid how option: {how}") else: raise TypeError("must specify how or thresh") @@ -4600,7 +4586,12 @@ def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False): else: return result - def drop_duplicates(self, subset=None, keep="first", inplace=False): + def drop_duplicates( + self, + subset: Union[Sequence[Hashable], Hashable] = None, + keep: Union[str, bool] = "first", + inplace: bool = False, + ) -> pd.DataFrame: """ Return DataFrame with duplicate rows removed. @@ -4637,7 +4628,11 @@ def drop_duplicates(self, subset=None, keep="first", inplace=False): else: return self[-duplicated] - def duplicated(self, subset=None, keep="first"): + def duplicated( + self, + subset: Union[Sequence[Hashable], Hashable] = None, + keep: Union[str, bool] = "first", + ) -> pd.Series: """ Return boolean Series denoting duplicate rows. @@ -7208,7 +7203,7 @@ def corr(self, method="pearson", min_periods=1): raise ValueError( "method must be either 'pearson', " "'spearman', 'kendall', or a callable, " - "'{method}' was supplied".format(method=method) + f"'{method}' was supplied" ) return self._constructor(correl, index=idx, columns=cols) @@ -7399,9 +7394,9 @@ def c(x): else: raise ValueError( - "Invalid method {method} was passed, " + f"Invalid method {method} was passed, " "valid methods are: 'pearson', 'kendall', " - "'spearman', or callable".format(method=method) + "'spearman', or callable" ) if not drop: @@ -7531,8 +7526,7 @@ def _count_level(self, level, axis=0, numeric_only=False): if not isinstance(count_axis, ABCMultiIndex): raise TypeError( - "Can only count levels on hierarchical " - "{ax}.".format(ax=self._get_axis_name(axis)) + "Can only count levels on hierarchical " f"{self._get_axis_name(axis)}." ) if frame._is_mixed_type: @@ -7590,8 +7584,8 @@ def _get_data(axis_matters): data = self._get_bool_data() else: # pragma: no cover msg = ( - "Generating numeric_only data with filter_type {f}" - "not supported.".format(f=filter_type) + f"Generating numeric_only data with filter_type {filter_type}" + "not supported." ) raise NotImplementedError(msg) return data @@ -8000,7 +7994,7 @@ def to_timestamp(self, freq=None, how="start", axis=0, copy=True): elif axis == 1: new_data.set_axis(0, self.columns.to_timestamp(freq=freq, how=how)) else: # pragma: no cover - raise AssertionError("Axis must be 0 or 1. Got {ax!s}".format(ax=axis)) + raise AssertionError(f"Axis must be 0 or 1. Got {axis}") return self._constructor(new_data) @@ -8034,7 +8028,7 @@ def to_period(self, freq=None, axis=0, copy=True): elif axis == 1: new_data.set_axis(0, self.columns.to_period(freq=freq)) else: # pragma: no cover - raise AssertionError("Axis must be 0 or 1. Got {ax!s}".format(ax=axis)) + raise AssertionError(f"Axis must be 0 or 1. Got {axis}") return self._constructor(new_data) @@ -8170,4 +8164,4 @@ def _from_nested_dict(data): def _put_str(s, space): - return "{s}".format(s=s)[:space].ljust(space) + return f"{s}"[:space].ljust(space) From c3fd3082ebaffb1f218943a2d69707bfcb238222 Mon Sep 17 00:00:00 2001 From: Michael Kakehashi Date: Tue, 3 Dec 2019 16:48:07 -0800 Subject: [PATCH 02/19] minor fix --- pandas/core/frame.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4690713ce574a..31e7963ffb12d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4192,7 +4192,7 @@ def reset_index( inplace: bool = False, col_level: Hashable = 0, col_fill: Union[Hashable, None] = "", - ) -> pd.DataFrame: + ) -> DataFrame: """ Reset the index, or a level of it. @@ -4591,7 +4591,7 @@ def drop_duplicates( subset: Union[Sequence[Hashable], Hashable] = None, keep: Union[str, bool] = "first", inplace: bool = False, - ) -> pd.DataFrame: + ) -> DataFrame: """ Return DataFrame with duplicate rows removed. @@ -8117,8 +8117,8 @@ def isin(self, values): else: if not is_list_like(values): raise TypeError( - f"only list-like or dict-like objects are allowed " - f"to be passed to DataFrame.isin(), " + "only list-like or dict-like objects are allowed " + "to be passed to DataFrame.isin(), " f"you passed a {repr(type(values).__name__)}" ) return DataFrame( From 3a4c2448982ee26b829479f77c1a161150ff0b70 Mon Sep 17 00:00:00 2001 From: Michael Kakehashi Date: Tue, 3 Dec 2019 19:37:51 -0800 Subject: [PATCH 03/19] cleaned up f strings, and flack 8 errors per PR comments --- pandas/core/frame.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 31e7963ffb12d..23e1006ce3802 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2332,7 +2332,7 @@ def _verbose_repr(): counts = self.count() if len(cols) != len(counts): # pragma: no cover raise AssertionError( - "Columns must equal counts " f"({len(cols)} != {len(counts)})" + f"Columns must equal counts ({len(cols)} != {len(counts)})" ) tmpl = "{count} non-null {dtype}" @@ -4113,7 +4113,11 @@ def set_index( try: found = col in self.columns except TypeError: - raise TypeError(err_msg + f" Received column of type {type(col)}") + raise TypeError( + 'The parameter "keys" may be a column key, one-dimensional ' + "array, or a list containing only valid column keys and " + f"one-dimensional arrays. Received column of type {type(col)}" + ) else: if not found: missing.append(col) @@ -4192,7 +4196,7 @@ def reset_index( inplace: bool = False, col_level: Hashable = 0, col_fill: Union[Hashable, None] = "", - ) -> DataFrame: + ) -> pandas.DataFrame: """ Reset the index, or a level of it. @@ -4591,7 +4595,7 @@ def drop_duplicates( subset: Union[Sequence[Hashable], Hashable] = None, keep: Union[str, bool] = "first", inplace: bool = False, - ) -> DataFrame: + ) -> pandas.DataFrame: """ Return DataFrame with duplicate rows removed. @@ -4632,7 +4636,7 @@ def duplicated( self, subset: Union[Sequence[Hashable], Hashable] = None, keep: Union[str, bool] = "first", - ) -> pd.Series: + ) -> pandas.Series: """ Return boolean Series denoting duplicate rows. From ef87c64db68700d32fcb8eb13814b1e57b4f2d15 Mon Sep 17 00:00:00 2001 From: Michael Kakehashi Date: Tue, 3 Dec 2019 20:04:31 -0800 Subject: [PATCH 04/19] fixed return annotation of functions that return a DataFrame --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 23e1006ce3802..75016cb9a2dcd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4196,7 +4196,7 @@ def reset_index( inplace: bool = False, col_level: Hashable = 0, col_fill: Union[Hashable, None] = "", - ) -> pandas.DataFrame: + ) -> "DataFrame": """ Reset the index, or a level of it. @@ -4595,7 +4595,7 @@ def drop_duplicates( subset: Union[Sequence[Hashable], Hashable] = None, keep: Union[str, bool] = "first", inplace: bool = False, - ) -> pandas.DataFrame: + ) -> "DataFrame": """ Return DataFrame with duplicate rows removed. From fde23a9222b8283bc23a673e504c21a9dc8c32ff Mon Sep 17 00:00:00 2001 From: Michael Kakehashi Date: Tue, 3 Dec 2019 20:22:43 -0800 Subject: [PATCH 05/19] fixed annotation of functions that return a Series --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 75016cb9a2dcd..6979a1db95317 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4636,7 +4636,7 @@ def duplicated( self, subset: Union[Sequence[Hashable], Hashable] = None, keep: Union[str, bool] = "first", - ) -> pandas.Series: + ) -> "Series": """ Return boolean Series denoting duplicate rows. From cf3399858a178b0e6b3d049db1f54d23f2d478f4 Mon Sep 17 00:00:00 2001 From: mck619 Date: Tue, 3 Dec 2019 20:46:12 -0800 Subject: [PATCH 06/19] Update pandas/core/frame.py Co-Authored-By: William Ayd --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6979a1db95317..d16b96203d154 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4195,7 +4195,7 @@ def reset_index( drop: bool = False, inplace: bool = False, col_level: Hashable = 0, - col_fill: Union[Hashable, None] = "", + col_fill: Optional[Hashable] = "", ) -> "DataFrame": """ Reset the index, or a level of it. From d223c888717c215d033bef23cf3b4f63f6f3f490 Mon Sep 17 00:00:00 2001 From: mck619 Date: Tue, 3 Dec 2019 20:46:20 -0800 Subject: [PATCH 07/19] Update pandas/core/frame.py Co-Authored-By: William Ayd --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d16b96203d154..e24cc79b7c16c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4191,7 +4191,7 @@ def set_index( def reset_index( self, - level: Union[Hashable, Tuple, List, None] = None, + level: Optional[Union[Hashable, Sequence[Hashable]] = None, drop: bool = False, inplace: bool = False, col_level: Hashable = 0, From 188410c265753d4270bb2c0fa3acc60cb087e966 Mon Sep 17 00:00:00 2001 From: mck619 Date: Tue, 3 Dec 2019 20:46:29 -0800 Subject: [PATCH 08/19] Update pandas/core/frame.py Co-Authored-By: William Ayd --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e24cc79b7c16c..a1358d76283d2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4114,7 +4114,7 @@ def set_index( found = col in self.columns except TypeError: raise TypeError( - 'The parameter "keys" may be a column key, one-dimensional ' + f"{err_msg} Received column of type {type(col)}" "array, or a list containing only valid column keys and " f"one-dimensional arrays. Received column of type {type(col)}" ) From bfdf696691bd2326ded0c59f9c78220de2ccef14 Mon Sep 17 00:00:00 2001 From: mck619 Date: Tue, 3 Dec 2019 20:46:59 -0800 Subject: [PATCH 09/19] Update pandas/core/frame.py Co-Authored-By: William Ayd --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a1358d76283d2..564231a247606 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4592,7 +4592,7 @@ def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False): def drop_duplicates( self, - subset: Union[Sequence[Hashable], Hashable] = None, + subset: Optional[Union[Hashable, Sequence[Hashable]] = None, keep: Union[str, bool] = "first", inplace: bool = False, ) -> "DataFrame": From 0ecb0008f727205daf4a12b014da8e1e1c740a98 Mon Sep 17 00:00:00 2001 From: mck619 Date: Tue, 3 Dec 2019 20:47:07 -0800 Subject: [PATCH 10/19] Update pandas/core/frame.py Co-Authored-By: William Ayd --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 564231a247606..4bbb2880a2676 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4634,7 +4634,7 @@ def drop_duplicates( def duplicated( self, - subset: Union[Sequence[Hashable], Hashable] = None, + subset: Optional[Union[Hashable, Sequence[Hashable]] = None, keep: Union[str, bool] = "first", ) -> "Series": """ From 7b52345db3005033a7ae1db062f5521d38b17e4b Mon Sep 17 00:00:00 2001 From: Michael Kakehashi Date: Tue, 3 Dec 2019 21:04:58 -0800 Subject: [PATCH 11/19] typing syntax fix --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4bbb2880a2676..bd39f936a9e35 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4191,7 +4191,7 @@ def set_index( def reset_index( self, - level: Optional[Union[Hashable, Sequence[Hashable]] = None, + level: Optional[Union[Hashable, Sequence[Hashable]]] = None, drop: bool = False, inplace: bool = False, col_level: Hashable = 0, From 5e7d91557bb980781708e03ce093757d56fd3035 Mon Sep 17 00:00:00 2001 From: Michael Kakehashi Date: Tue, 3 Dec 2019 21:06:41 -0800 Subject: [PATCH 12/19] more typing syntax fixes --- pandas/core/frame.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bd39f936a9e35..910eb330e663b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4114,7 +4114,7 @@ def set_index( found = col in self.columns except TypeError: raise TypeError( - f"{err_msg} Received column of type {type(col)}" + f"{err_msg} Received column of type {type(col)}" "array, or a list containing only valid column keys and " f"one-dimensional arrays. Received column of type {type(col)}" ) @@ -4592,7 +4592,7 @@ def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False): def drop_duplicates( self, - subset: Optional[Union[Hashable, Sequence[Hashable]] = None, + subset: Optional[Union[Hashable, Sequence[Hashable]]] = None, keep: Union[str, bool] = "first", inplace: bool = False, ) -> "DataFrame": @@ -4634,7 +4634,7 @@ def drop_duplicates( def duplicated( self, - subset: Optional[Union[Hashable, Sequence[Hashable]] = None, + subset: Optional[Union[Hashable, Sequence[Hashable]]] = None, keep: Union[str, bool] = "first", ) -> "Series": """ From 70ef86095ffe1e210ce6eb5896229940b119d872 Mon Sep 17 00:00:00 2001 From: mck619 Date: Wed, 4 Dec 2019 09:23:08 -0800 Subject: [PATCH 13/19] Update pandas/core/frame.py Co-Authored-By: Simon Hawkins --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 910eb330e663b..368844b3be5fe 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1112,7 +1112,7 @@ def dot(self, other): rvals = np.asarray(other) if lvals.shape[1] != rvals.shape[0]: raise ValueError( - "Dot product shape mismatch, " f"{lvals.shape} vs {rvals.shape}" + f"Dot product shape mismatch, {lvals.shape} vs {rvals.shape}" ) if isinstance(other, DataFrame): From 997a2e33ed695493bc3d5f32537f245a73cc423c Mon Sep 17 00:00:00 2001 From: Michael Kakehashi Date: Wed, 4 Dec 2019 09:27:34 -0800 Subject: [PATCH 14/19] fixed fstring with err_msg --- pandas/core/frame.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 910eb330e663b..3e6802ee2c72b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4114,9 +4114,7 @@ def set_index( found = col in self.columns except TypeError: raise TypeError( - f"{err_msg} Received column of type {type(col)}" - "array, or a list containing only valid column keys and " - f"one-dimensional arrays. Received column of type {type(col)}" + f"{err_msg}. Received column of type {type(col)}" ) else: if not found: From a00c34dc57977a1d28a6ff763f5980463758c34a Mon Sep 17 00:00:00 2001 From: mck619 Date: Wed, 4 Dec 2019 09:32:26 -0800 Subject: [PATCH 15/19] Update pandas/core/frame.py Co-Authored-By: Simon Hawkins --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 368844b3be5fe..6c3eff149f936 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7530,7 +7530,7 @@ def _count_level(self, level, axis=0, numeric_only=False): if not isinstance(count_axis, ABCMultiIndex): raise TypeError( - "Can only count levels on hierarchical " f"{self._get_axis_name(axis)}." + f"Can only count levels on hierarchical {self._get_axis_name(axis)}." ) if frame._is_mixed_type: From 099feb61e867b53568a72ccbbc8e8b5bb64a826d Mon Sep 17 00:00:00 2001 From: Michael Kakehashi Date: Wed, 4 Dec 2019 09:36:16 -0800 Subject: [PATCH 16/19] fstring clean up --- pandas/core/frame.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 298192661e3a9..47dc6483c1b6f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4410,8 +4410,7 @@ def _maybe_casted_values(index, labels=None): if len(col_name) not in (1, self.columns.nlevels): raise ValueError( "col_fill=None is incompatible " - "with incomplete column name " - f"{name}" + f"with incomplete column name {name}" ) col_fill = col_name[0] @@ -7586,7 +7585,7 @@ def _get_data(axis_matters): data = self._get_bool_data() else: # pragma: no cover msg = ( - f"Generating numeric_only data with filter_type {filter_type}" + f"Generating numeric_only data with filter_type {filter_type} " "not supported." ) raise NotImplementedError(msg) @@ -8166,4 +8165,4 @@ def _from_nested_dict(data): def _put_str(s, space): - return f"{s}"[:space].ljust(space) + return str(s)[:space].ljust(space) From 18fed327b296c3f3a2bab41fc6283a419aea6359 Mon Sep 17 00:00:00 2001 From: Michael Kakehashi Date: Wed, 4 Dec 2019 09:52:09 -0800 Subject: [PATCH 17/19] black formatting --- pandas/core/frame.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5457febe8b6f5..6afd64f64df24 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4113,9 +4113,7 @@ def set_index( try: found = col in self.columns except TypeError: - raise TypeError( - f"{err_msg}. Received column of type {type(col)}" - ) + raise TypeError(f"{err_msg}. Received column of type {type(col)}") else: if not found: missing.append(col) From 17444ec7e8256d7d8e11278a43e78465dc0beb23 Mon Sep 17 00:00:00 2001 From: Michael Kakehashi Date: Thu, 5 Dec 2019 15:42:09 -0800 Subject: [PATCH 18/19] mypy fixes per Simon's comments --- pandas/core/frame.py | 21 +++++++++++++++++---- pandas/core/reshape/merge.py | 5 ++++- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6afd64f64df24..871e2066ae6c1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -15,6 +15,7 @@ import sys from textwrap import dedent from typing import ( + Any, FrozenSet, Hashable, Iterable, @@ -25,6 +26,7 @@ Tuple, Type, Union, + cast, ) import warnings @@ -4192,7 +4194,7 @@ def reset_index( inplace: bool = False, col_level: Hashable = 0, col_fill: Optional[Hashable] = "", - ) -> "DataFrame": + ) -> Optional["DataFrame"]: """ Reset the index, or a level of it. @@ -4220,8 +4222,8 @@ def reset_index( Returns ------- - DataFrame - DataFrame with the new index. + DataFrame or None + DataFrame with the new index or None if ``inplace=True``. See Also -------- @@ -4386,6 +4388,7 @@ def _maybe_casted_values(index, labels=None): new_index = self.index.droplevel(level) if not drop: + to_insert: Iterable[Tuple[Any, Optional[Any]]] if isinstance(self.index, ABCMultiIndex): names = [ (n if n is not None else f"level_{i}") @@ -4425,6 +4428,8 @@ def _maybe_casted_values(index, labels=None): if not inplace: return new_obj + return None + # ---------------------------------------------------------------------- # Reindex-based selection methods @@ -4590,7 +4595,7 @@ def drop_duplicates( subset: Optional[Union[Hashable, Sequence[Hashable]]] = None, keep: Union[str, bool] = "first", inplace: bool = False, - ) -> "DataFrame": + ) -> Optional["DataFrame"]: """ Return DataFrame with duplicate rows removed. @@ -4613,6 +4618,7 @@ def drop_duplicates( Returns ------- DataFrame + DataFrame with duplicates removed or None if ``inplace=True`` """ if self.empty: return self.copy() @@ -4627,6 +4633,8 @@ def drop_duplicates( else: return self[-duplicated] + return None + def duplicated( self, subset: Optional[Union[Hashable, Sequence[Hashable]]] = None, @@ -4675,6 +4683,9 @@ def f(vals): ): subset = (subset,) + # needed for mypy since can't narrow types using np.iterable + subset = cast(Iterable, subset) + # Verify all columns in subset exist in the queried dataframe # Otherwise, raise a KeyError, same as if you try to __getitem__ with a # key that doesn't exist. @@ -6024,6 +6035,8 @@ def explode(self, column: Union[str, Tuple]) -> "DataFrame": raise ValueError("columns must be unique") df = self.reset_index(drop=True) + # TODO: use overload to refine return type of reset_index + assert df is not None # needed for mypy result = df[column].explode() result = df.drop([column], axis=1).join(result) result.index = self.index.take(result.index) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index d671fff568891..726a59ca8e008 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -126,7 +126,10 @@ def _groupby_and_merge( on = [on] if right.duplicated(by + on).any(): - right = right.drop_duplicates(by + on, keep="last") + _right = right.drop_duplicates(by + on, keep="last") + # TODO: use overload to refine return type of drop_duplicates + assert _right is not None # needed for mypy + right = _right rby = right.groupby(by, sort=False) except KeyError: rby = None From 1a9c6f0504dbaea65427214a89000e9febf0313f Mon Sep 17 00:00:00 2001 From: Michael Kakehashi Date: Thu, 5 Dec 2019 16:04:50 -0800 Subject: [PATCH 19/19] doc string fix --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 871e2066ae6c1..88967b13c89b5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4618,7 +4618,7 @@ def drop_duplicates( Returns ------- DataFrame - DataFrame with duplicates removed or None if ``inplace=True`` + DataFrame with duplicates removed or None if ``inplace=True``. """ if self.empty: return self.copy()