Skip to content

Commit 27d0735

Browse files
jbrockmendelMateusz Górski
authored and
Mateusz Górski
committed
CLN: assorted cleanups (pandas-dev#29406)
1 parent 87e25fe commit 27d0735

File tree

11 files changed

+42
-58
lines changed

11 files changed

+42
-58
lines changed

pandas/_libs/sparse.pyx

+1-4
Original file line numberDiff line numberDiff line change
@@ -597,7 +597,7 @@ cdef class BlockIndex(SparseIndex):
597597

598598
result = np.empty(other.npoints, dtype=np.float64)
599599

600-
for 0 <= i < other.nblocks:
600+
for i in range(other.nblocks):
601601
ocur = olocs[i]
602602
ocurlen = olens[i]
603603

@@ -746,9 +746,6 @@ cdef class BlockUnion(BlockMerge):
746746

747747
nend = xend[xi]
748748

749-
# print 'here xi=%d, yi=%d, mode=%d, nend=%d' % (self.xi, self.yi,
750-
# mode, nend)
751-
752749
# done with y?
753750
if yi == ynblocks:
754751
self._set_current_indices(xi + 1, yi, mode)

pandas/core/groupby/generic.py

-4
Original file line numberDiff line numberDiff line change
@@ -1124,10 +1124,6 @@ def _decide_output_index(self, output, labels):
11241124
output_keys = labels
11251125
else:
11261126
output_keys = sorted(output)
1127-
try:
1128-
output_keys.sort()
1129-
except TypeError:
1130-
pass
11311127

11321128
if isinstance(labels, MultiIndex):
11331129
output_keys = MultiIndex.from_tuples(output_keys, names=labels.names)

pandas/core/groupby/groupby.py

+22-29
Original file line numberDiff line numberDiff line change
@@ -1092,9 +1092,8 @@ def result_to_bool(result: np.ndarray, inference: Type) -> np.ndarray:
10921092

10931093
return self._get_cythonized_result(
10941094
"group_any_all",
1095-
self.grouper,
10961095
aggregate=True,
1097-
cython_dtype=np.uint8,
1096+
cython_dtype=np.dtype(np.uint8),
10981097
needs_values=True,
10991098
needs_mask=True,
11001099
pre_processing=objs_to_bool,
@@ -1305,7 +1304,7 @@ def size(self):
13051304
result = self.grouper.size()
13061305

13071306
if isinstance(self.obj, Series):
1308-
result.name = getattr(self.obj, "name", None)
1307+
result.name = self.obj.name
13091308
return result
13101309

13111310
@classmethod
@@ -1586,9 +1585,8 @@ def _fill(self, direction, limit=None):
15861585

15871586
return self._get_cythonized_result(
15881587
"group_fillna_indexer",
1589-
self.grouper,
15901588
needs_mask=True,
1591-
cython_dtype=np.int64,
1589+
cython_dtype=np.dtype(np.int64),
15921590
result_is_index=True,
15931591
direction=direction,
15941592
limit=limit,
@@ -1882,11 +1880,10 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray:
18821880
if is_scalar(q):
18831881
return self._get_cythonized_result(
18841882
"group_quantile",
1885-
self.grouper,
18861883
aggregate=True,
18871884
needs_values=True,
18881885
needs_mask=True,
1889-
cython_dtype=np.float64,
1886+
cython_dtype=np.dtype(np.float64),
18901887
pre_processing=pre_processor,
18911888
post_processing=post_processor,
18921889
q=q,
@@ -1896,11 +1893,10 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray:
18961893
results = [
18971894
self._get_cythonized_result(
18981895
"group_quantile",
1899-
self.grouper,
19001896
aggregate=True,
19011897
needs_values=True,
19021898
needs_mask=True,
1903-
cython_dtype=np.float64,
1899+
cython_dtype=np.dtype(np.float64),
19041900
pre_processing=pre_processor,
19051901
post_processing=post_processor,
19061902
q=qi,
@@ -2167,14 +2163,13 @@ def cummax(self, axis=0, **kwargs):
21672163

21682164
def _get_cythonized_result(
21692165
self,
2170-
how,
2171-
grouper,
2172-
aggregate=False,
2173-
cython_dtype=None,
2174-
needs_values=False,
2175-
needs_mask=False,
2176-
needs_ngroups=False,
2177-
result_is_index=False,
2166+
how: str,
2167+
cython_dtype: np.dtype,
2168+
aggregate: bool = False,
2169+
needs_values: bool = False,
2170+
needs_mask: bool = False,
2171+
needs_ngroups: bool = False,
2172+
result_is_index: bool = False,
21782173
pre_processing=None,
21792174
post_processing=None,
21802175
**kwargs
@@ -2185,13 +2180,11 @@ def _get_cythonized_result(
21852180
Parameters
21862181
----------
21872182
how : str, Cythonized function name to be called
2188-
grouper : Grouper object containing pertinent group info
2183+
cython_dtype : np.dtype
2184+
Type of the array that will be modified by the Cython call.
21892185
aggregate : bool, default False
21902186
Whether the result should be aggregated to match the number of
21912187
groups
2192-
cython_dtype : default None
2193-
Type of the array that will be modified by the Cython call. If
2194-
`None`, the type will be inferred from the values of each slice
21952188
needs_values : bool, default False
21962189
Whether the values should be a part of the Cython call
21972190
signature
@@ -2234,8 +2227,10 @@ def _get_cythonized_result(
22342227
"Cannot use 'pre_processing' without specifying 'needs_values'!"
22352228
)
22362229

2230+
grouper = self.grouper
2231+
22372232
labels, _, ngroups = grouper.group_info
2238-
output = collections.OrderedDict()
2233+
output = collections.OrderedDict() # type: dict
22392234
base_func = getattr(libgroupby, how)
22402235

22412236
for name, obj in self._iterate_slices():
@@ -2246,9 +2241,6 @@ def _get_cythonized_result(
22462241
else:
22472242
result_sz = len(values)
22482243

2249-
if not cython_dtype:
2250-
cython_dtype = values.dtype
2251-
22522244
result = np.zeros(result_sz, dtype=cython_dtype)
22532245
func = partial(base_func, result, labels)
22542246
inferences = None
@@ -2308,8 +2300,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
23082300

23092301
return self._get_cythonized_result(
23102302
"group_shift_indexer",
2311-
self.grouper,
2312-
cython_dtype=np.int64,
2303+
cython_dtype=np.dtype(np.int64),
23132304
needs_ngroups=True,
23142305
result_is_index=True,
23152306
periods=periods,
@@ -2478,11 +2469,13 @@ def _reindex_output(self, output):
24782469

24792470

24802471
@Appender(GroupBy.__doc__)
2481-
def groupby(obj, by, **kwds):
2472+
def groupby(obj: NDFrame, by, **kwds):
24822473
if isinstance(obj, Series):
24832474
from pandas.core.groupby.generic import SeriesGroupBy
24842475

2485-
klass = SeriesGroupBy
2476+
klass = (
2477+
SeriesGroupBy
2478+
) # type: Union[Type["SeriesGroupBy"], Type["DataFrameGroupBy"]]
24862479
elif isinstance(obj, DataFrame):
24872480
from pandas.core.groupby.generic import DataFrameGroupBy
24882481

pandas/core/groupby/ops.py

+5-11
Original file line numberDiff line numberDiff line change
@@ -592,13 +592,10 @@ def agg_series(self, obj, func):
592592
return self._aggregate_series_pure_python(obj, func)
593593

594594
def _aggregate_series_fast(self, obj, func):
595+
# At this point we have already checked that obj.index is not a MultiIndex
596+
# and that obj is backed by an ndarray, not ExtensionArray
595597
func = self._is_builtin_func(func)
596598

597-
# TODO: pre-empt this, also pre-empt get_result raising TypError if we pass a EA
598-
# for EAs backed by ndarray we may have a performant workaround
599-
if obj.index._has_complex_internals:
600-
raise TypeError("Incompatible index for Cython grouper")
601-
602599
group_index, _, ngroups = self.group_info
603600

604601
# avoids object / Series creation overhead
@@ -842,15 +839,12 @@ def __iter__(self):
842839
def _get_sorted_data(self):
843840
return self.data.take(self.sort_idx, axis=self.axis)
844841

845-
def _chop(self, sdata, slice_obj):
846-
raise AbstractMethodError(self)
847-
848-
def apply(self, f):
842+
def _chop(self, sdata, slice_obj: slice):
849843
raise AbstractMethodError(self)
850844

851845

852846
class SeriesSplitter(DataSplitter):
853-
def _chop(self, sdata, slice_obj):
847+
def _chop(self, sdata, slice_obj: slice):
854848
return sdata._get_values(slice_obj)
855849

856850

@@ -862,7 +856,7 @@ def fast_apply(self, f, names):
862856
sdata = self._get_sorted_data()
863857
return libreduction.apply_frame_axis0(sdata, f, names, starts, ends)
864858

865-
def _chop(self, sdata, slice_obj):
859+
def _chop(self, sdata, slice_obj: slice):
866860
if self.axis == 0:
867861
return sdata.iloc[slice_obj]
868862
else:

pandas/core/indexes/base.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -4747,10 +4747,9 @@ def get_indexer_for(self, target, **kwargs):
47474747

47484748
def _maybe_promote(self, other):
47494749
# A hack, but it works
4750-
from pandas import DatetimeIndex
47514750

4752-
if self.inferred_type == "date" and isinstance(other, DatetimeIndex):
4753-
return DatetimeIndex(self), other
4751+
if self.inferred_type == "date" and isinstance(other, ABCDatetimeIndex):
4752+
return type(other)(self), other
47544753
elif self.inferred_type == "boolean":
47554754
if not is_object_dtype(self.dtype):
47564755
return self.astype("object"), other.astype("object")

pandas/core/indexes/multi.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -2179,7 +2179,9 @@ def drop(self, codes, level=None, errors="raise"):
21792179
mask = indexer == -1
21802180
if mask.any():
21812181
if errors != "ignore":
2182-
raise ValueError("codes %s not contained in axis" % codes[mask])
2182+
raise ValueError(
2183+
"codes {codes} not contained in axis".format(codes=codes[mask])
2184+
)
21832185
except Exception:
21842186
pass
21852187

pandas/core/internals/construction.py

+1
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ def init_ndarray(values, index, columns, dtype=None, copy=False):
167167
try:
168168
values = values.astype(dtype)
169169
except Exception as orig:
170+
# e.g. ValueError when trying to cast object dtype to float64
170171
raise ValueError(
171172
"failed to cast to '{dtype}' (Exception "
172173
"was: {orig})".format(dtype=dtype, orig=orig)

pandas/core/reshape/concat.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ def get_result(self):
478478
self, method="concat"
479479
)
480480

481-
def _get_result_dim(self):
481+
def _get_result_dim(self) -> int:
482482
if self._is_series and self.axis == 1:
483483
return 2
484484
else:

pandas/core/reshape/merge.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1948,13 +1948,13 @@ def _get_join_keys(llab, rlab, shape, sort):
19481948
return _get_join_keys(llab, rlab, shape, sort)
19491949

19501950

1951-
def _should_fill(lname, rname):
1951+
def _should_fill(lname, rname) -> bool:
19521952
if not isinstance(lname, str) or not isinstance(rname, str):
19531953
return True
19541954
return lname == rname
19551955

19561956

1957-
def _any(x):
1957+
def _any(x) -> bool:
19581958
return x is not None and com.any_not_none(*x)
19591959

19601960

pandas/core/reshape/pivot.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,9 @@ def _normalize(table, normalize, margins, margins_name="All"):
620620
if (margins_name not in table.iloc[-1, :].name) | (
621621
margins_name != table.iloc[:, -1].name
622622
):
623-
raise ValueError("{} not in pivoted DataFrame".format(margins_name))
623+
raise ValueError(
624+
"{mname} not in pivoted DataFrame".format(mname=margins_name)
625+
)
624626
column_margin = table.iloc[:-1, -1]
625627
index_margin = table.iloc[-1, :-1]
626628

pandas/core/reshape/reshape.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ class _Unstacker:
8888

8989
def __init__(
9090
self,
91-
values,
91+
values: np.ndarray,
9292
index,
9393
level=-1,
9494
value_columns=None,
@@ -985,7 +985,7 @@ def get_empty_frame(data):
985985
else:
986986

987987
# PY2 embedded unicode, gh-22084
988-
def _make_col_name(prefix, prefix_sep, level):
988+
def _make_col_name(prefix, prefix_sep, level) -> str:
989989
fstr = "{prefix}{prefix_sep}{level}"
990990
return fstr.format(prefix=prefix, prefix_sep=prefix_sep, level=level)
991991

0 commit comments

Comments
 (0)