diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index 2145551833e90..55863e649078d 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -388,7 +388,6 @@ def validate_func_kwargs( >>> validate_func_kwargs({'one': 'min', 'two': 'max'}) (['one', 'two'], ['min', 'max']) """ - no_arg_message = "Must provide 'func' or named aggregation **kwargs." tuple_given_message = "func is expected but received {} in **kwargs." columns = list(kwargs) func = [] @@ -397,6 +396,7 @@ def validate_func_kwargs( raise TypeError(tuple_given_message.format(type(col_func).__name__)) func.append(col_func) if not columns: + no_arg_message = "Must provide 'func' or named aggregation **kwargs." raise TypeError(no_arg_message) return columns, func @@ -499,14 +499,14 @@ def transform_dict_like( try: results[name] = transform(colg, how, 0, *args, **kwargs) except Exception as err: - if ( - str(err) == "Function did not transform" - or str(err) == "No transform functions were provided" - ): + if str(err) in { + "Function did not transform", + "No transform functions were provided", + }: raise err # combine results - if len(results) == 0: + if not results: raise ValueError("Transform function failed") return concat(results, axis=1) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 085ad5e6a0dcf..6553a67df47e2 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -150,12 +150,10 @@ def _ensure_data( from pandas import PeriodIndex values = PeriodIndex(values)._data - dtype = values.dtype elif is_timedelta64_dtype(values.dtype) or is_timedelta64_dtype(dtype): from pandas import TimedeltaIndex values = TimedeltaIndex(values)._data - dtype = values.dtype else: # Datetime if values.ndim > 1 and is_datetime64_ns_dtype(values.dtype): @@ -169,8 +167,7 @@ def _ensure_data( from pandas import DatetimeIndex values = DatetimeIndex(values)._data - dtype = values.dtype - + dtype = values.dtype return values.asi8, dtype elif is_categorical_dtype(values.dtype) and ( @@ -875,10 +872,9 @@ def value_counts_arraylike(values, dropna: bool): keys, counts = f(values, dropna) mask = isna(values) - if not dropna and mask.any(): - if not isna(keys).any(): - keys = np.insert(keys, 0, np.NaN) - counts = np.insert(counts, 0, mask.sum()) + if not dropna and mask.any() and not isna(keys).any(): + keys = np.insert(keys, 0, np.NaN) + counts = np.insert(counts, 0, mask.sum()) keys = _reconstruct_data(keys, original.dtype, original) @@ -1741,9 +1737,8 @@ def take_nd( dtype, fill_value = arr.dtype, arr.dtype.type() flip_order = False - if arr.ndim == 2: - if arr.flags.f_contiguous: - flip_order = True + if arr.ndim == 2 and arr.flags.f_contiguous: + flip_order = True if flip_order: arr = arr.T @@ -1915,8 +1910,7 @@ def searchsorted(arr, value, side="left", sorter=None) -> np.ndarray: # and `value` is a pd.Timestamp, we may need to convert value arr = ensure_wrapped_if_datetimelike(arr) - result = arr.searchsorted(value, side=side, sorter=sorter) - return result + return arr.searchsorted(value, side=side, sorter=sorter) # ---- # diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py index 76d723beac7e6..1cac825cc0898 100644 --- a/pandas/core/array_algos/replace.py +++ b/pandas/core/array_algos/replace.py @@ -49,8 +49,7 @@ def _check_comparison_types( if is_scalar(result) and isinstance(a, np.ndarray): type_names = [type(a).__name__, type(b).__name__] - if isinstance(a, np.ndarray): - type_names[0] = f"ndarray(dtype={a.dtype})" + type_names[0] = f"ndarray(dtype={a.dtype})" raise TypeError( f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}" diff --git a/pandas/core/base.py b/pandas/core/base.py index b603ba31f51dd..631f67ced77dd 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -321,10 +321,9 @@ def _try_aggregate_string_function(self, arg: str, *args, **kwargs): return f f = getattr(np, arg, None) - if f is not None: - if hasattr(self, "__array__"): - # in particular exclude Window - return f(self, *args, **kwargs) + if f is not None and hasattr(self, "__array__"): + # in particular exclude Window + return f(self, *args, **kwargs) raise AttributeError( f"'{arg}' is not a valid function for '{type(self).__name__}' object" @@ -1046,7 +1045,7 @@ def value_counts( 1.0 1 dtype: int64 """ - result = value_counts( + return value_counts( self, sort=sort, ascending=ascending, @@ -1054,7 +1053,6 @@ def value_counts( bins=bins, dropna=dropna, ) - return result def unique(self): values = self._values @@ -1317,8 +1315,7 @@ def drop_duplicates(self, keep="first"): duplicated = self.duplicated(keep=keep) # pandas\core\base.py:1507: error: Value of type "IndexOpsMixin" is not # indexable [index] - result = self[np.logical_not(duplicated)] # type: ignore[index] - return result + return self[~duplicated] # type: ignore[index] def duplicated(self, keep="first"): return duplicated(self._values, keep=keep) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 54a6f47ae1b38..2980547e23f24 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -343,8 +343,7 @@ def array( elif is_timedelta64_ns_dtype(dtype): return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy) - result = PandasArray._from_sequence(data, dtype=dtype, copy=copy) - return result + return PandasArray._from_sequence(data, dtype=dtype, copy=copy) def extract_array(obj: object, extract_numpy: bool = False) -> Union[Any, ArrayLike]: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ffc84ad94459a..0a1ea4041a10b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -770,7 +770,7 @@ def _repr_fits_horizontal_(self, ignore_width: bool = False) -> bool: # and to_string on entire frame may be expensive d = self - if not (max_rows is None): # unlimited rows + if max_rows is not None: # unlimited rows # min of two, where one may be None d = d.iloc[: min(max_rows, len(d))] else: @@ -2029,10 +2029,10 @@ def to_records( np.asarray(self.iloc[:, i]) for i in range(len(self.columns)) ] - count = 0 index_names = list(self.index.names) if isinstance(self.index, MultiIndex): + count = 0 for i, n in enumerate(index_names): if n is None: index_names[i] = f"level_{count}" @@ -3334,7 +3334,7 @@ def _set_value(self, index, col, value, takeable: bool = False): takeable : interpret the index/col as indexers, default False """ try: - if takeable is True: + if takeable: series = self._ixs(col, axis=1) series._set_value(index, value, takeable=True) return @@ -5041,7 +5041,7 @@ class max type multi_col = isinstance(self.columns, MultiIndex) for i, (lev, lab) in reversed(list(enumerate(to_insert))): - if not (level is None or i in level): + if level is not None and i not in level: continue name = names[i] if multi_col: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6a80fa3e93362..27b1751a5045e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -773,8 +773,7 @@ def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries: """ labels = self._get_axis(axis) new_labels = labels.droplevel(level) - result = self.set_axis(new_labels, axis=axis, inplace=False) - return result + return self.set_axis(new_labels, axis=axis, inplace=False) def pop(self, item: Hashable) -> Union[Series, Any]: result = self[item] @@ -1445,8 +1444,7 @@ def __invert__(self): return self new_data = self._mgr.apply(operator.invert) - result = self._constructor(new_data).__finalize__(self, method="__invert__") - return result + return self._constructor(new_data).__finalize__(self, method="__invert__") @final def __nonzero__(self): @@ -2036,8 +2034,7 @@ def _repr_data_resource_(self): as_json = data.to_json(orient="table") as_json = cast(str, as_json) - payload = json.loads(as_json, object_pairs_hook=collections.OrderedDict) - return payload + return json.loads(as_json, object_pairs_hook=collections.OrderedDict) # ---------------------------------------------------------------------- # I/O Methods @@ -5342,11 +5339,11 @@ def sample( "Replace has to be set to `True` when " "upsampling the population `frac` > 1." ) - elif n is not None and frac is None and n % 1 != 0: + elif frac is None and n % 1 != 0: raise ValueError("Only integers accepted as `n` values") elif n is None and frac is not None: n = round(frac * axis_length) - elif n is not None and frac is not None: + elif frac is not None: raise ValueError("Please enter a value for `frac` OR `n`, not both") # Check for negative sizes @@ -5467,15 +5464,13 @@ def __getattr__(self, name: str): # Note: obj.x will always call obj.__getattribute__('x') prior to # calling obj.__getattr__('x'). if ( - name in self._internal_names_set - or name in self._metadata - or name in self._accessors + name not in self._internal_names_set + and name not in self._metadata + and name not in self._accessors + and self._info_axis._can_hold_identifiers_and_holds_name(name) ): - return object.__getattribute__(self, name) - else: - if self._info_axis._can_hold_identifiers_and_holds_name(name): - return self[name] - return object.__getattribute__(self, name) + return self[name] + return object.__getattribute__(self, name) def __setattr__(self, name: str, value) -> None: """ @@ -5585,17 +5580,16 @@ def _is_mixed_type(self) -> bool_t: @final def _check_inplace_setting(self, value) -> bool_t: """ check whether we allow in-place setting with this type of value """ - if self._is_mixed_type: - if not self._mgr.is_numeric_mixed_type: + if self._is_mixed_type and not self._mgr.is_numeric_mixed_type: - # allow an actual np.nan thru - if is_float(value) and np.isnan(value): - return True + # allow an actual np.nan thru + if is_float(value) and np.isnan(value): + return True - raise TypeError( - "Cannot do inplace boolean setting on " - "mixed-types with a non np.nan value" - ) + raise TypeError( + "Cannot do inplace boolean setting on " + "mixed-types with a non np.nan value" + ) return True @@ -6264,8 +6258,7 @@ def convert_dtypes( ) for col_name, col in self.items() ] - result = concat(results, axis=1, copy=False) - return result + return concat(results, axis=1, copy=False) # ---------------------------------------------------------------------- # Filling NA's @@ -7443,9 +7436,13 @@ def clip( upper = None # GH 2747 (arguments were reversed) - if lower is not None and upper is not None: - if is_scalar(lower) and is_scalar(upper): - lower, upper = min(lower, upper), max(lower, upper) + if ( + lower is not None + and upper is not None + and is_scalar(lower) + and is_scalar(upper) + ): + lower, upper = min(lower, upper), max(lower, upper) # fast-path for scalars if (lower is None or (is_scalar(lower) and is_number(lower))) and ( @@ -8234,10 +8231,9 @@ def first(self: FrameOrSeries, offset) -> FrameOrSeries: end_date = end = self.index[0] + offset # Tick-like, e.g. 3 weeks - if isinstance(offset, Tick): - if end_date in self.index: - end = self.index.searchsorted(end_date, side="left") - return self.iloc[:end] + if isinstance(offset, Tick) and end_date in self.index: + end = self.index.searchsorted(end_date, side="left") + return self.iloc[:end] return self.loc[:end] @@ -8646,17 +8642,19 @@ def _align_frame( is_series = isinstance(self, ABCSeries) - if axis is None or axis == 0: - if not self.index.equals(other.index): - join_index, ilidx, iridx = self.index.join( - other.index, how=join, level=level, return_indexers=True - ) + if (axis is None or axis == 0) and not self.index.equals(other.index): + join_index, ilidx, iridx = self.index.join( + other.index, how=join, level=level, return_indexers=True + ) - if axis is None or axis == 1: - if not is_series and not self.columns.equals(other.columns): - join_columns, clidx, cridx = self.columns.join( - other.columns, how=join, level=level, return_indexers=True - ) + if ( + (axis is None or axis == 1) + and not is_series + and not self.columns.equals(other.columns) + ): + join_columns, clidx, cridx = self.columns.join( + other.columns, how=join, level=level, return_indexers=True + ) if is_series: reindexers = {0: [join_index, ilidx]} @@ -9526,9 +9524,8 @@ def truncate( before = to_datetime(before) after = to_datetime(after) - if before is not None and after is not None: - if before > after: - raise ValueError(f"Truncate: {after} must be after {before}") + if before is not None and after is not None and before > after: + raise ValueError(f"Truncate: {after} must be after {before}") if len(ax) > 1 and ax.is_monotonic_decreasing: before, after = after, before diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index 79479c6db8d9d..399953fc17c73 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -82,12 +82,11 @@ def is_scalar_indexer(indexer, ndim: int) -> bool: if ndim == 1 and is_integer(indexer): # GH37748: allow indexer to be an integer for Series return True - if isinstance(indexer, tuple): - if len(indexer) == ndim: - return all( - is_integer(x) or (isinstance(x, np.ndarray) and x.ndim == len(x) == 1) - for x in indexer - ) + if isinstance(indexer, tuple) and len(indexer) == ndim: + return all( + is_integer(x) or (isinstance(x, np.ndarray) and x.ndim == len(x) == 1) + for x in indexer + ) return False diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 90ba03b312e56..c98242cae23f3 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1001,10 +1001,7 @@ def _multi_take_opportunity(self, tup: Tuple) -> bool: return False # just too complicated - if any(com.is_bool_indexer(x) for x in tup): - return False - - return True + return not any(com.is_bool_indexer(x) for x in tup) def _multi_take(self, tup: Tuple): """ @@ -1424,11 +1421,7 @@ def _is_scalar_access(self, key: Tuple) -> bool: if len(key) != self.ndim: return False - for k in key: - if not is_integer(k): - return False - - return True + return all(is_integer(k) for k in key) def _validate_integer(self, key: int, axis: int) -> None: """ @@ -1551,12 +1544,11 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"): # if there is only one block/type, still have to take split path # unless the block is one-dimensional or it can hold the value - if not take_split_path and self.obj._mgr.blocks: - if self.ndim > 1: - # in case of dict, keys are indices - val = list(value.values()) if isinstance(value, dict) else value - blk = self.obj._mgr.blocks[0] - take_split_path = not blk._can_hold_element(val) + if not take_split_path and self.obj._mgr.blocks and self.ndim > 1: + # in case of dict, keys are indices + val = list(value.values()) if isinstance(value, dict) else value + blk = self.obj._mgr.blocks[0] + take_split_path = not blk._can_hold_element(val) # if we have any multi-indexes that have non-trivial slices # (not null slices) then we must take the split path, xref diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 1120416eebeb9..ddee68c08b540 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -147,7 +147,7 @@ def find_valid_index(values, how: str): if how == "first": idxpos = is_valid[::].argmax() - if how == "last": + elif how == "last": idxpos = len(values) - 1 - is_valid[::-1].argmax() chk_notna = is_valid[idxpos] diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index fb9b20bd43d7c..8d3363df0d132 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -149,10 +149,7 @@ def _bn_ok_dtype(dtype: DtypeObj, name: str) -> bool: # further we also want to preserve NaN when all elements # are NaN, unlike bottleneck/numpy which consider this # to be 0 - if name in ["nansum", "nanprod"]: - return False - - return True + return name not in ["nansum", "nanprod"] return False @@ -184,14 +181,11 @@ def _get_fill_value( else: return -np.inf else: - if fill_value_typ is None: - return iNaT + if fill_value_typ == "+inf": + # need the max int here + return np.iinfo(np.int64).max else: - if fill_value_typ == "+inf": - # need the max int here - return np.iinfo(np.int64).max - else: - return iNaT + return iNaT def _maybe_get_mask( @@ -433,8 +427,7 @@ def _na_for_min_count( else: result_shape = values.shape[:axis] + values.shape[axis + 1 :] - result = np.full(result_shape, fill_value, dtype=values.dtype) - return result + return np.full(result_shape, fill_value, dtype=values.dtype) def nanany( @@ -1151,12 +1144,12 @@ def nanskew( if isinstance(result, np.ndarray): result = np.where(m2 == 0, 0, result) result[count < 3] = np.nan - return result else: result = 0 if m2 == 0 else result if count < 3: return np.nan - return result + + return result @disallow("M8", "m8") diff --git a/pandas/core/resample.py b/pandas/core/resample.py index f6c1da723a1d9..bae0d69f6b782 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -181,8 +181,7 @@ def _convert_obj(self, obj): ------- obj : converted object """ - obj = obj._consolidate() - return obj + return obj._consolidate() def _get_binner_for_time(self): raise AbstractMethodError(self) @@ -1070,17 +1069,16 @@ def _downsample(self, how, **kwargs): return obj # do we have a regular frequency - if ax.freq is not None or ax.inferred_freq is not None: - - # pandas\core\resample.py:1037: error: "BaseGrouper" has no - # attribute "binlabels" [attr-defined] - if ( - len(self.grouper.binlabels) > len(ax) # type: ignore[attr-defined] - and how is None - ): + # pandas\core\resample.py:1037: error: "BaseGrouper" has no + # attribute "binlabels" [attr-defined] + if ( + (ax.freq is not None or ax.inferred_freq is not None) + and len(self.grouper.binlabels) > len(ax) # type: ignore[attr-defined] + and how is None + ): - # let's do an asfreq - return self.asfreq() + # let's do an asfreq + return self.asfreq() # we are downsampling # we want to call the actual grouper method here diff --git a/pandas/core/series.py b/pandas/core/series.py index 3888194305d76..e1399f73128e3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -950,7 +950,7 @@ def __setitem__(self, key, value): self._set_with_engine(key, value) except (KeyError, ValueError): values = self._values - if is_integer(key) and not self.index.inferred_type == "integer": + if is_integer(key) and self.index.inferred_type != "integer": # positional setter values[key] = value else: @@ -1284,9 +1284,7 @@ def __repr__(self) -> str: max_rows=max_rows, length=show_dimensions, ) - result = buf.getvalue() - - return result + return buf.getvalue() def to_string( self, @@ -1847,8 +1845,7 @@ def unique(self): ['b', 'a', 'c'] Categories (3, object): ['a' < 'b' < 'c'] """ - result = super().unique() - return result + return super().unique() def drop_duplicates(self, keep="first", inplace=False) -> Optional["Series"]: """ @@ -2704,8 +2701,7 @@ def _binop(self, other, func, level=None, fill_value=None): result = func(this_vals, other_vals) name = ops.get_op_result_name(self, other) - ret = this._construct_result(result, name) - return ret + return this._construct_result(result, name) def _construct_result( self, result: Union[ArrayLike, Tuple[ArrayLike, ArrayLike]], name: Hashable @@ -3757,9 +3753,7 @@ def explode(self, ignore_index: bool = False) -> Series: else: index = self.index.repeat(counts) - result = self._constructor(values, index=index, name=self.name) - - return result + return self._constructor(values, index=index, name=self.name) def unstack(self, level=-1, fill_value=None): """ diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 8869533be30fb..21b213d71cd54 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -465,9 +465,7 @@ def _ensure_key_mapped_multiindex( for level in range(index.nlevels) ] - labels = type(index).from_arrays(mapped) - - return labels + return type(index).from_arrays(mapped) def ensure_key_mapped(values, key: Optional[Callable], levels=None):