Skip to content

added f strings and typing to frame.py #30021

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
Dec 6, 2019
Merged
Changes from 20 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 48 additions & 55 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ def __init__(
except (ValueError, TypeError) as e:
exc = TypeError(
"DataFrame constructor called with "
"incompatible data and dtype: {e}".format(e=e)
f"incompatible data and dtype: {e}"
)
raise exc from e

Expand Down Expand Up @@ -1112,8 +1112,7 @@ def dot(self, other):
rvals = np.asarray(other)
if lvals.shape[1] != rvals.shape[0]:
raise ValueError(
"Dot product shape mismatch, "
"{s} vs {r}".format(s=lvals.shape, r=rvals.shape)
f"Dot product shape mismatch, {lvals.shape} vs {rvals.shape}"
)

if isinstance(other, DataFrame):
Expand All @@ -1129,7 +1128,7 @@ def dot(self, other):
else:
return Series(result, index=left.index)
else: # pragma: no cover
raise TypeError("unsupported type: {oth}".format(oth=type(other)))
raise TypeError(f"unsupported type: {type(other)}")

def __matmul__(self, other):
"""
Expand Down Expand Up @@ -1417,7 +1416,7 @@ def to_dict(self, orient="dict", into=dict):
for t in self.itertuples(name=None)
)
else:
raise ValueError("orient '{o}' not understood".format(o=orient))
raise ValueError(f"orient '{orient}' not understood")

def to_gbq(
self,
Expand Down Expand Up @@ -1836,9 +1835,7 @@ def to_records(self, index=True, column_dtypes=None, index_dtypes=None):
formats.append(dtype_mapping)
else:
element = "row" if i < index_len else "column"
msg = ("Invalid dtype {dtype} specified for {element} {name}").format(
dtype=dtype_mapping, element=element, name=name
)
msg = f"Invalid dtype {dtype_mapping} specified for {element} {name}"
raise ValueError(msg)

return np.rec.fromarrays(arrays, dtype={"names": names, "formats": formats})
Expand Down Expand Up @@ -2307,7 +2304,7 @@ def info(
lines.append(self.index._summary())

if len(self.columns) == 0:
lines.append("Empty {name}".format(name=type(self).__name__))
lines.append(f"Empty {type(self).__name__}")
fmt.buffer_put_lines(buf, lines)
return

Expand Down Expand Up @@ -2335,10 +2332,7 @@ def _verbose_repr():
counts = self.count()
if len(cols) != len(counts): # pragma: no cover
raise AssertionError(
"Columns must equal counts "
"({cols:d} != {counts:d})".format(
cols=len(cols), counts=len(counts)
)
f"Columns must equal counts ({len(cols)} != {len(counts)})"
)
tmpl = "{count} non-null {dtype}"

Expand Down Expand Up @@ -2382,7 +2376,7 @@ def _sizeof_fmt(num, size_qualifier):

counts = self._data.get_dtype_counts()
dtypes = ["{k}({kk:d})".format(k=k[0], kk=k[1]) for k in sorted(counts.items())]
lines.append("dtypes: {types}".format(types=", ".join(dtypes)))
lines.append(f"dtypes: {', '.join(dtypes)}")

if memory_usage is None:
memory_usage = get_option("display.memory_usage")
Expand All @@ -2399,12 +2393,7 @@ def _sizeof_fmt(num, size_qualifier):
if "object" in counts or self.index._is_memory_usage_qualified():
size_qualifier = "+"
mem_usage = self.memory_usage(index=True, deep=deep).sum()
lines.append(
"memory usage: {mem}\n".format(
mem=_sizeof_fmt(mem_usage, size_qualifier)
)
)

lines.append(f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}\n")
fmt.buffer_put_lines(buf, lines)

def memory_usage(self, index=True, deep=False):
Expand Down Expand Up @@ -3069,8 +3058,8 @@ def query(self, expr, inplace=False, **kwargs):
"""
inplace = validate_bool_kwarg(inplace, "inplace")
if not isinstance(expr, str):
msg = "expr must be a string to be evaluated, {0} given"
raise ValueError(msg.format(type(expr)))
msg = f"expr must be a string to be evaluated, {type(expr)} given"
raise ValueError(msg)
kwargs["level"] = kwargs.pop("level", 0) + 1
kwargs["target"] = None
res = self.eval(expr, **kwargs)
Expand Down Expand Up @@ -3287,11 +3276,7 @@ def select_dtypes(self, include=None, exclude=None):

# can't both include AND exclude!
if not include.isdisjoint(exclude):
raise ValueError(
"include and exclude overlap on {inc_ex}".format(
inc_ex=(include & exclude)
)
)
raise ValueError(f"include and exclude overlap on {(include & exclude)}")

# We raise when both include and exclude are empty
# Hence, we can just shrink the columns we want to keep
Expand Down Expand Up @@ -4128,15 +4113,13 @@ def set_index(
try:
found = col in self.columns
except TypeError:
raise TypeError(
err_msg + " Received column of type {}".format(type(col))
)
raise TypeError(f"{err_msg}. Received column of type {type(col)}")
else:
if not found:
missing.append(col)

if missing:
raise KeyError("None of {} are in the columns".format(missing))
raise KeyError(f"None of {missing} are in the columns")

if inplace:
frame = self
Expand Down Expand Up @@ -4180,17 +4163,15 @@ def set_index(
# check newest element against length of calling frame, since
# ensure_index_from_sequences would not raise for append=False.
raise ValueError(
"Length mismatch: Expected {len_self} rows, "
"received array of length {len_col}".format(
len_self=len(self), len_col=len(arrays[-1])
)
f"Length mismatch: Expected {len(self)} rows, "
f"received array of length {len(arrays[-1])}"
)

index = ensure_index_from_sequences(arrays, names)

if verify_integrity and not index.is_unique:
duplicates = index[index.duplicated()].unique()
raise ValueError("Index has duplicate keys: {dup}".format(dup=duplicates))
raise ValueError(f"Index has duplicate keys: {duplicates}")

# use set to handle duplicate column names gracefully in case of drop
for c in set(to_remove):
Expand All @@ -4205,8 +4186,13 @@ def set_index(
return frame

def reset_index(
self, level=None, drop=False, inplace=False, col_level=0, col_fill=""
):
self,
level: Optional[Union[Hashable, Sequence[Hashable]]] = None,
drop: bool = False,
inplace: bool = False,
col_level: Hashable = 0,
col_fill: Optional[Hashable] = "",
) -> "DataFrame":
"""
Reset the index, or a level of it.

Expand Down Expand Up @@ -4422,8 +4408,7 @@ def _maybe_casted_values(index, labels=None):
if len(col_name) not in (1, self.columns.nlevels):
raise ValueError(
"col_fill=None is incompatible "
"with incomplete column name "
"{}".format(name)
f"with incomplete column name {name}"
)
col_fill = col_name[0]

Expand Down Expand Up @@ -4589,7 +4574,7 @@ def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False):
mask = count > 0
else:
if how is not None:
raise ValueError("invalid how option: {h}".format(h=how))
raise ValueError(f"invalid how option: {how}")
else:
raise TypeError("must specify how or thresh")

Expand All @@ -4600,7 +4585,12 @@ def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False):
else:
return result

def drop_duplicates(self, subset=None, keep="first", inplace=False):
def drop_duplicates(
self,
subset: Optional[Union[Hashable, Sequence[Hashable]]] = None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is None a valid value in a sequence of labels?

Copy link
Contributor Author

@mck619 mck619 Dec 4, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is, check this out:

df = pd.DataFrame({None:[1,2,2], 'col1':[1,2,3]})
df.drop_duplicates(subset=[None])

outputs:

   NaN  col1
0    1     1
1    2     2

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
subset: Optional[Union[Hashable, Sequence[Hashable]]] = None,
subset: Optional[Union[Hashable, Sequence[Optional[Hashable]]]] = None,

should probably be Sequence[Optional[Hashable]] in that case.

keep: Union[str, bool] = "first",
inplace: bool = False,
) -> "DataFrame":
"""
Return DataFrame with duplicate rows removed.

Expand Down Expand Up @@ -4637,7 +4627,11 @@ def drop_duplicates(self, subset=None, keep="first", inplace=False):
else:
return self[-duplicated]

def duplicated(self, subset=None, keep="first"):
def duplicated(
self,
subset: Optional[Union[Hashable, Sequence[Hashable]]] = None,
keep: Union[str, bool] = "first",
) -> "Series":
"""
Return boolean Series denoting duplicate rows.

Expand Down Expand Up @@ -7208,7 +7202,7 @@ def corr(self, method="pearson", min_periods=1):
raise ValueError(
"method must be either 'pearson', "
"'spearman', 'kendall', or a callable, "
"'{method}' was supplied".format(method=method)
f"'{method}' was supplied"
)

return self._constructor(correl, index=idx, columns=cols)
Expand Down Expand Up @@ -7399,9 +7393,9 @@ def c(x):

else:
raise ValueError(
"Invalid method {method} was passed, "
f"Invalid method {method} was passed, "
"valid methods are: 'pearson', 'kendall', "
"'spearman', or callable".format(method=method)
"'spearman', or callable"
)

if not drop:
Expand Down Expand Up @@ -7531,8 +7525,7 @@ def _count_level(self, level, axis=0, numeric_only=False):

if not isinstance(count_axis, ABCMultiIndex):
raise TypeError(
"Can only count levels on hierarchical "
"{ax}.".format(ax=self._get_axis_name(axis))
f"Can only count levels on hierarchical {self._get_axis_name(axis)}."
)

if frame._is_mixed_type:
Expand Down Expand Up @@ -7590,8 +7583,8 @@ def _get_data(axis_matters):
data = self._get_bool_data()
else: # pragma: no cover
msg = (
"Generating numeric_only data with filter_type {f}"
"not supported.".format(f=filter_type)
f"Generating numeric_only data with filter_type {filter_type} "
"not supported."
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this appears to have a missing space in the message. do we have a test for this message?

)
raise NotImplementedError(msg)
return data
Expand Down Expand Up @@ -8000,7 +7993,7 @@ def to_timestamp(self, freq=None, how="start", axis=0, copy=True):
elif axis == 1:
new_data.set_axis(0, self.columns.to_timestamp(freq=freq, how=how))
else: # pragma: no cover
raise AssertionError("Axis must be 0 or 1. Got {ax!s}".format(ax=axis))
raise AssertionError(f"Axis must be 0 or 1. Got {axis}")

return self._constructor(new_data)

Expand Down Expand Up @@ -8034,7 +8027,7 @@ def to_period(self, freq=None, axis=0, copy=True):
elif axis == 1:
new_data.set_axis(0, self.columns.to_period(freq=freq))
else: # pragma: no cover
raise AssertionError("Axis must be 0 or 1. Got {ax!s}".format(ax=axis))
raise AssertionError(f"Axis must be 0 or 1. Got {axis}")

return self._constructor(new_data)

Expand Down Expand Up @@ -8123,8 +8116,8 @@ def isin(self, values):
else:
if not is_list_like(values):
raise TypeError(
f"only list-like or dict-like objects are allowed "
f"to be passed to DataFrame.isin(), "
"only list-like or dict-like objects are allowed "
"to be passed to DataFrame.isin(), "
f"you passed a {repr(type(values).__name__)}"
)
return DataFrame(
Expand Down Expand Up @@ -8170,4 +8163,4 @@ def _from_nested_dict(data):


def _put_str(s, space):
return "{s}".format(s=s)[:space].ljust(space)
return str(s)[:space].ljust(space)