Skip to content

CLN: assorted cleanups #29406

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions pandas/_libs/sparse.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -597,7 +597,7 @@ cdef class BlockIndex(SparseIndex):

result = np.empty(other.npoints, dtype=np.float64)

for 0 <= i < other.nblocks:
for i in range(other.nblocks):
ocur = olocs[i]
ocurlen = olens[i]

Expand Down Expand Up @@ -746,9 +746,6 @@ cdef class BlockUnion(BlockMerge):

nend = xend[xi]

# print 'here xi=%d, yi=%d, mode=%d, nend=%d' % (self.xi, self.yi,
# mode, nend)

# done with y?
if yi == ynblocks:
self._set_current_indices(xi + 1, yi, mode)
Expand Down
4 changes: 0 additions & 4 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1124,10 +1124,6 @@ def _decide_output_index(self, output, labels):
output_keys = labels
else:
output_keys = sorted(output)
try:
output_keys.sort()
except TypeError:
pass

if isinstance(labels, MultiIndex):
output_keys = MultiIndex.from_tuples(output_keys, names=labels.names)
Expand Down
51 changes: 22 additions & 29 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1092,9 +1092,8 @@ def result_to_bool(result: np.ndarray, inference: Type) -> np.ndarray:

return self._get_cythonized_result(
"group_any_all",
self.grouper,
aggregate=True,
cython_dtype=np.uint8,
cython_dtype=np.dtype(np.uint8),
needs_values=True,
needs_mask=True,
pre_processing=objs_to_bool,
Expand Down Expand Up @@ -1305,7 +1304,7 @@ def size(self):
result = self.grouper.size()

if isinstance(self.obj, Series):
result.name = getattr(self.obj, "name", None)
result.name = self.obj.name
return result

@classmethod
Expand Down Expand Up @@ -1586,9 +1585,8 @@ def _fill(self, direction, limit=None):

return self._get_cythonized_result(
"group_fillna_indexer",
self.grouper,
needs_mask=True,
cython_dtype=np.int64,
cython_dtype=np.dtype(np.int64),
result_is_index=True,
direction=direction,
limit=limit,
Expand Down Expand Up @@ -1882,11 +1880,10 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray:
if is_scalar(q):
return self._get_cythonized_result(
"group_quantile",
self.grouper,
aggregate=True,
needs_values=True,
needs_mask=True,
cython_dtype=np.float64,
cython_dtype=np.dtype(np.float64),
pre_processing=pre_processor,
post_processing=post_processor,
q=q,
Expand All @@ -1896,11 +1893,10 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray:
results = [
self._get_cythonized_result(
"group_quantile",
self.grouper,
aggregate=True,
needs_values=True,
needs_mask=True,
cython_dtype=np.float64,
cython_dtype=np.dtype(np.float64),
pre_processing=pre_processor,
post_processing=post_processor,
q=qi,
Expand Down Expand Up @@ -2167,14 +2163,13 @@ def cummax(self, axis=0, **kwargs):

def _get_cythonized_result(
self,
how,
grouper,
aggregate=False,
cython_dtype=None,
needs_values=False,
needs_mask=False,
needs_ngroups=False,
result_is_index=False,
how: str,
cython_dtype: np.dtype,
aggregate: bool = False,
needs_values: bool = False,
needs_mask: bool = False,
needs_ngroups: bool = False,
result_is_index: bool = False,
pre_processing=None,
post_processing=None,
**kwargs
Expand All @@ -2185,13 +2180,11 @@ def _get_cythonized_result(
Parameters
----------
how : str, Cythonized function name to be called
grouper : Grouper object containing pertinent group info
cython_dtype : np.dtype
Type of the array that will be modified by the Cython call.
aggregate : bool, default False
Whether the result should be aggregated to match the number of
groups
cython_dtype : default None
Type of the array that will be modified by the Cython call. If
`None`, the type will be inferred from the values of each slice
needs_values : bool, default False
Whether the values should be a part of the Cython call
signature
Expand Down Expand Up @@ -2234,8 +2227,10 @@ def _get_cythonized_result(
"Cannot use 'pre_processing' without specifying 'needs_values'!"
)

grouper = self.grouper

labels, _, ngroups = grouper.group_info
output = collections.OrderedDict()
output = collections.OrderedDict() # type: dict
base_func = getattr(libgroupby, how)

for name, obj in self._iterate_slices():
Expand All @@ -2246,9 +2241,6 @@ def _get_cythonized_result(
else:
result_sz = len(values)

if not cython_dtype:
cython_dtype = values.dtype

result = np.zeros(result_sz, dtype=cython_dtype)
func = partial(base_func, result, labels)
inferences = None
Expand Down Expand Up @@ -2308,8 +2300,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):

return self._get_cythonized_result(
"group_shift_indexer",
self.grouper,
cython_dtype=np.int64,
cython_dtype=np.dtype(np.int64),
needs_ngroups=True,
result_is_index=True,
periods=periods,
Expand Down Expand Up @@ -2478,11 +2469,13 @@ def _reindex_output(self, output):


@Appender(GroupBy.__doc__)
def groupby(obj, by, **kwds):
def groupby(obj: NDFrame, by, **kwds):
if isinstance(obj, Series):
from pandas.core.groupby.generic import SeriesGroupBy

klass = SeriesGroupBy
klass = (
SeriesGroupBy
) # type: Union[Type["SeriesGroupBy"], Type["DataFrameGroupBy"]]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think Type[GroupBy] would have been sufficient.

elif isinstance(obj, DataFrame):
from pandas.core.groupby.generic import DataFrameGroupBy

Expand Down
16 changes: 5 additions & 11 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,13 +592,10 @@ def agg_series(self, obj, func):
return self._aggregate_series_pure_python(obj, func)

def _aggregate_series_fast(self, obj, func):
# At this point we have already checked that obj.index is not a MultiIndex
# and that obj is backed by an ndarray, not ExtensionArray
func = self._is_builtin_func(func)

# TODO: pre-empt this, also pre-empt get_result raising TypError if we pass a EA
# for EAs backed by ndarray we may have a performant workaround
if obj.index._has_complex_internals:
raise TypeError("Incompatible index for Cython grouper")

group_index, _, ngroups = self.group_info

# avoids object / Series creation overhead
Expand Down Expand Up @@ -842,15 +839,12 @@ def __iter__(self):
def _get_sorted_data(self):
return self.data.take(self.sort_idx, axis=self.axis)

def _chop(self, sdata, slice_obj):
raise AbstractMethodError(self)

def apply(self, f):
def _chop(self, sdata, slice_obj: slice):
raise AbstractMethodError(self)


class SeriesSplitter(DataSplitter):
def _chop(self, sdata, slice_obj):
def _chop(self, sdata, slice_obj: slice):
return sdata._get_values(slice_obj)


Expand All @@ -862,7 +856,7 @@ def fast_apply(self, f, names):
sdata = self._get_sorted_data()
return libreduction.apply_frame_axis0(sdata, f, names, starts, ends)

def _chop(self, sdata, slice_obj):
def _chop(self, sdata, slice_obj: slice):
if self.axis == 0:
return sdata.iloc[slice_obj]
else:
Expand Down
5 changes: 2 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4747,10 +4747,9 @@ def get_indexer_for(self, target, **kwargs):

def _maybe_promote(self, other):
# A hack, but it works
from pandas import DatetimeIndex

if self.inferred_type == "date" and isinstance(other, DatetimeIndex):
return DatetimeIndex(self), other
if self.inferred_type == "date" and isinstance(other, ABCDatetimeIndex):
return type(other)(self), other
elif self.inferred_type == "boolean":
if not is_object_dtype(self.dtype):
return self.astype("object"), other.astype("object")
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2179,7 +2179,9 @@ def drop(self, codes, level=None, errors="raise"):
mask = indexer == -1
if mask.any():
if errors != "ignore":
raise ValueError("codes %s not contained in axis" % codes[mask])
raise ValueError(
"codes {codes} not contained in axis".format(codes=codes[mask])
)
except Exception:
pass

Expand Down
1 change: 1 addition & 0 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ def init_ndarray(values, index, columns, dtype=None, copy=False):
try:
values = values.astype(dtype)
except Exception as orig:
# e.g. ValueError when trying to cast object dtype to float64
raise ValueError(
"failed to cast to '{dtype}' (Exception "
"was: {orig})".format(dtype=dtype, orig=orig)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ def get_result(self):
self, method="concat"
)

def _get_result_dim(self):
def _get_result_dim(self) -> int:
if self._is_series and self.axis == 1:
return 2
else:
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1948,13 +1948,13 @@ def _get_join_keys(llab, rlab, shape, sort):
return _get_join_keys(llab, rlab, shape, sort)


def _should_fill(lname, rname):
def _should_fill(lname, rname) -> bool:
if not isinstance(lname, str) or not isinstance(rname, str):
return True
return lname == rname


def _any(x):
def _any(x) -> bool:
return x is not None and com.any_not_none(*x)


Expand Down
4 changes: 3 additions & 1 deletion pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,9 @@ def _normalize(table, normalize, margins, margins_name="All"):
if (margins_name not in table.iloc[-1, :].name) | (
margins_name != table.iloc[:, -1].name
):
raise ValueError("{} not in pivoted DataFrame".format(margins_name))
raise ValueError(
"{mname} not in pivoted DataFrame".format(mname=margins_name)
)
column_margin = table.iloc[:-1, -1]
index_margin = table.iloc[-1, :-1]

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ class _Unstacker:

def __init__(
self,
values,
values: np.ndarray,
index,
level=-1,
value_columns=None,
Expand Down Expand Up @@ -985,7 +985,7 @@ def get_empty_frame(data):
else:

# PY2 embedded unicode, gh-22084
def _make_col_name(prefix, prefix_sep, level):
def _make_col_name(prefix, prefix_sep, level) -> str:
fstr = "{prefix}{prefix_sep}{level}"
return fstr.format(prefix=prefix, prefix_sep=prefix_sep, level=level)

Expand Down