From 3f275aa2d7ac3529555b08a6716eb057bad80d12 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 2 Nov 2020 14:16:40 +0000 Subject: [PATCH 01/22] refactor rest of core --- pandas/core/aggregation.py | 12 +-- pandas/core/algorithms.py | 30 +++--- pandas/core/apply.py | 7 +- pandas/core/array_algos/replace.py | 3 +- pandas/core/base.py | 17 ++-- pandas/core/common.py | 4 +- pandas/core/construction.py | 21 ++-- pandas/core/frame.py | 8 +- pandas/core/generic.py | 151 ++++++++++++++--------------- pandas/core/indexers.py | 21 ++-- pandas/core/indexing.py | 26 ++--- pandas/core/missing.py | 2 +- pandas/core/nanops.py | 27 ++---- pandas/core/resample.py | 15 +-- pandas/core/series.py | 60 +++++------- pandas/core/sorting.py | 6 +- 16 files changed, 188 insertions(+), 222 deletions(-) diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index c64f0bd71cf84..0af855d5a504e 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -387,7 +387,6 @@ def validate_func_kwargs( >>> validate_func_kwargs({'one': 'min', 'two': 'max'}) (['one', 'two'], ['min', 'max']) """ - no_arg_message = "Must provide 'func' or named aggregation **kwargs." tuple_given_message = "func is expected but received {} in **kwargs." columns = list(kwargs) func = [] @@ -396,6 +395,7 @@ def validate_func_kwargs( raise TypeError(tuple_given_message.format(type(col_func).__name__)) func.append(col_func) if not columns: + no_arg_message = "Must provide 'func' or named aggregation **kwargs." raise TypeError(no_arg_message) return columns, func @@ -497,14 +497,14 @@ def transform_dict_like( try: results[name] = transform(colg, how, 0, *args, **kwargs) except Exception as err: - if ( - str(err) == "Function did not transform" - or str(err) == "No transform functions were provided" - ): + if str(err) in [ + "Function did not transform", + "No transform functions were provided", + ]: raise err # combine results - if len(results) == 0: + if not results: raise ValueError("Transform function failed") return concat(results, axis=1) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index e9e04ace784b6..4c3714edc85c1 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -100,11 +100,13 @@ def _ensure_data( values = extract_array(values, extract_numpy=True) # we check some simple dtypes first - if is_object_dtype(dtype): - return ensure_object(np.asarray(values)), np.dtype("object") - elif is_object_dtype(values) and dtype is None: + if ( + is_object_dtype(dtype) + or not is_object_dtype(dtype) + and is_object_dtype(values) + and dtype is None + ): return ensure_object(np.asarray(values)), np.dtype("object") - try: if is_bool_dtype(values) or is_bool_dtype(dtype): # we are actually coercing to uint64 @@ -137,12 +139,10 @@ def _ensure_data( from pandas import PeriodIndex values = PeriodIndex(values) - dtype = values.dtype elif is_timedelta64_dtype(vals_dtype) or is_timedelta64_dtype(dtype): from pandas import TimedeltaIndex values = TimedeltaIndex(values) - dtype = values.dtype else: # Datetime if values.ndim > 1 and is_datetime64_ns_dtype(vals_dtype): @@ -156,8 +156,7 @@ def _ensure_data( from pandas import DatetimeIndex values = DatetimeIndex(values) - dtype = values.dtype - + dtype = values.dtype return values.asi8, dtype elif is_categorical_dtype(vals_dtype) and ( @@ -821,10 +820,9 @@ def value_counts_arraylike(values, dropna: bool): keys, counts = f(values, dropna) mask = isna(values) - if not dropna and mask.any(): - if not isna(keys).any(): - keys = np.insert(keys, 0, np.NaN) - counts = np.insert(counts, 0, mask.sum()) + if not dropna and mask.any() and not isna(keys).any(): + keys = np.insert(keys, 0, np.NaN) + counts = np.insert(counts, 0, mask.sum()) keys = _reconstruct_data(keys, original.dtype, original) @@ -1684,9 +1682,8 @@ def take_nd( dtype, fill_value = arr.dtype, arr.dtype.type() flip_order = False - if arr.ndim == 2: - if arr.flags.f_contiguous: - flip_order = True + if arr.ndim == 2 and arr.flags.f_contiguous: + flip_order = True if flip_order: arr = arr.T @@ -1861,8 +1858,7 @@ def searchsorted(arr, value, side="left", sorter=None): if isinstance(value, Timestamp) and value.tzinfo is None: value = value.to_datetime64() - result = arr.searchsorted(value, side=side, sorter=sorter) - return result + return arr.searchsorted(value, side=side, sorter=sorter) # ---- # diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 002e260742dc5..9909b2c0dc53a 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -377,9 +377,10 @@ def wrap_results_for_axis( else: raise - if not isinstance(results[0], ABCSeries): - if len(result.index) == len(self.res_columns): - result.index = self.res_columns + if not isinstance(results[0], ABCSeries) and len(result.index) == len( + self.res_columns + ): + result.index = self.res_columns if len(result.columns) == len(res_index): result.columns = res_index diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py index 9eaa265adab2b..406ef60b2d9e7 100644 --- a/pandas/core/array_algos/replace.py +++ b/pandas/core/array_algos/replace.py @@ -47,8 +47,7 @@ def _check_comparison_types( if is_scalar(result) and isinstance(a, np.ndarray): type_names = [type(a).__name__, type(b).__name__] - if isinstance(a, np.ndarray): - type_names[0] = f"ndarray(dtype={a.dtype})" + type_names[0] = f"ndarray(dtype={a.dtype})" raise TypeError( f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}" diff --git a/pandas/core/base.py b/pandas/core/base.py index c91e4db004f2a..cc45f34032527 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -284,10 +284,9 @@ def _try_aggregate_string_function(self, arg: str, *args, **kwargs): return f f = getattr(np, arg, None) - if f is not None: - if hasattr(self, "__array__"): - # in particular exclude Window - return f(self, *args, **kwargs) + if f is not None and hasattr(self, "__array__"): + # in particular exclude Window + return f(self, *args, **kwargs) raise AttributeError( f"'{arg}' is not a valid function for '{type(self).__name__}' object" @@ -570,8 +569,8 @@ def to_numpy(self, dtype=None, copy=False, na_value=lib.no_default, **kwargs): # TODO(GH-24345): Avoid potential double copy if copy or na_value is not lib.no_default: result = result.copy() - if na_value is not lib.no_default: - result[self.isna()] = na_value + if na_value is not lib.no_default: + result[self.isna()] = na_value return result @property @@ -974,7 +973,7 @@ def value_counts( NaN 1 dtype: int64 """ - result = value_counts( + return value_counts( self, sort=sort, ascending=ascending, @@ -982,7 +981,6 @@ def value_counts( bins=bins, dropna=dropna, ) - return result def unique(self): values = self._values @@ -1244,8 +1242,7 @@ def searchsorted(self, value, side="left", sorter=None) -> np.ndarray: def drop_duplicates(self, keep="first"): duplicated = self.duplicated(keep=keep) - result = self[np.logical_not(duplicated)] - return result + return self[np.logical_not(duplicated)] def duplicated(self, keep="first"): return duplicated(self._values, keep=keep) diff --git a/pandas/core/common.py b/pandas/core/common.py index b860c83f89cbc..0f7228be28d3f 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -128,8 +128,10 @@ def is_bool_indexer(key: Any) -> bool: key = np.asarray(key) if not lib.is_bool_array(key): - na_msg = "Cannot mask with non-boolean array containing NA / NaN values" if isna(key).any(): + na_msg = ( + "Cannot mask with non-boolean array containing NA / NaN values" + ) raise ValueError(na_msg) return False return True diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 7901e150a7ff4..0a702849e4c4e 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -347,8 +347,7 @@ def array( elif is_timedelta64_ns_dtype(dtype): return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy) - result = PandasArray._from_sequence(data, dtype=dtype, copy=copy) - return result + return PandasArray._from_sequence(data, dtype=dtype, copy=copy) def extract_array(obj: AnyArrayLike, extract_numpy: bool = False) -> ArrayLike: @@ -551,9 +550,13 @@ def _try_cast(arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bo Otherwise an object array is returned. """ # perf shortcut as this is the most common case - if isinstance(arr, np.ndarray): - if maybe_castable(arr) and not copy and dtype is None: - return arr + if ( + isinstance(arr, np.ndarray) + and maybe_castable(arr) + and not copy + and dtype is None + ): + return arr if isinstance(dtype, ExtensionDtype) and (dtype.kind != "M" or is_sparse(dtype)): # create an extension array from its dtype @@ -575,9 +578,11 @@ def _try_cast(arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bo # Take care in creating object arrays (but iterators are not # supported): - if is_object_dtype(dtype) and ( - is_list_like(subarr) - and not (is_iterator(subarr) or isinstance(subarr, np.ndarray)) + if ( + is_object_dtype(dtype) + and is_list_like(subarr) + and not is_iterator(subarr) + and not isinstance(subarr, np.ndarray) ): subarr = construct_1d_object_array_from_listlike(subarr) elif not is_extension_array_dtype(subarr): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5134529d9c21f..02cfd057169ea 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -701,7 +701,7 @@ def _repr_fits_horizontal_(self, ignore_width: bool = False) -> bool: # and to_string on entire frame may be expensive d = self - if not (max_rows is None): # unlimited rows + if max_rows is not None: # unlimited rows # min of two, where one may be None d = d.iloc[: min(max_rows, len(d))] else: @@ -1934,10 +1934,10 @@ def to_records( np.asarray(self.iloc[:, i]) for i in range(len(self.columns)) ] - count = 0 index_names = list(self.index.names) if isinstance(self.index, MultiIndex): + count = 0 for i, n in enumerate(index_names): if n is None: index_names[i] = f"level_{count}" @@ -3179,7 +3179,7 @@ def _set_value(self, index, col, value, takeable: bool = False): takeable : interpret the index/col as indexers, default False """ try: - if takeable is True: + if takeable: series = self._ixs(col, axis=1) series._set_value(index, value, takeable=True) return @@ -4913,7 +4913,7 @@ class max type multi_col = isinstance(self.columns, MultiIndex) for i, (lev, lab) in reversed(list(enumerate(to_insert))): - if not (level is None or i in level): + if level is not None and i not in level: continue name = names[i] if multi_col: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c90ab9cceea8c..faaee91a8a707 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -501,7 +501,7 @@ def _get_block_manager_axis(cls, axis: Axis) -> int: def _get_axis_resolvers(self, axis: str) -> Dict[str, Union[Series, MultiIndex]]: # index or columns axis_index = getattr(self, axis) - d = dict() + d = {} prefix = axis[0] for i, name in enumerate(axis_index.names): @@ -770,8 +770,7 @@ def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries: """ labels = self._get_axis(axis) new_labels = labels.droplevel(level) - result = self.set_axis(new_labels, axis=axis, inplace=False) - return result + return self.set_axis(new_labels, axis=axis, inplace=False) def pop(self, item: Label) -> Union[Series, Any]: result = self[item] @@ -1435,8 +1434,7 @@ def __invert__(self): return self new_data = self._mgr.apply(operator.invert) - result = self._constructor(new_data).__finalize__(self, method="__invert__") - return result + return self._constructor(new_data).__finalize__(self, method="__invert__") def __nonzero__(self): raise ValueError( @@ -2006,8 +2004,7 @@ def _repr_data_resource_(self): as_json = data.to_json(orient="table") as_json = cast(str, as_json) - payload = json.loads(as_json, object_pairs_hook=collections.OrderedDict) - return payload + return json.loads(as_json, object_pairs_hook=collections.OrderedDict) # ---------------------------------------------------------------------- # I/O Methods @@ -5265,11 +5262,11 @@ def sample( "Replace has to be set to `True` when " "upsampling the population `frac` > 1." ) - elif n is not None and frac is None and n % 1 != 0: + elif frac is None and n % 1 != 0: raise ValueError("Only integers accepted as `n` values") - elif n is None and frac is not None: + elif n is None: n = int(round(frac * axis_length)) - elif n is not None and frac is not None: + elif frac is not None: raise ValueError("Please enter a value for `frac` OR `n`, not both") # Check for negative sizes @@ -5383,15 +5380,13 @@ def __getattr__(self, name: str): # Note: obj.x will always call obj.__getattribute__('x') prior to # calling obj.__getattr__('x'). if ( - name in self._internal_names_set - or name in self._metadata - or name in self._accessors + name not in self._internal_names_set + and name not in self._metadata + and name not in self._accessors + and self._info_axis._can_hold_identifiers_and_holds_name(name) ): - return object.__getattribute__(self, name) - else: - if self._info_axis._can_hold_identifiers_and_holds_name(name): - return self[name] - return object.__getattribute__(self, name) + return self[name] + return object.__getattribute__(self, name) def __setattr__(self, name: str, value) -> None: """ @@ -5410,19 +5405,15 @@ def __setattr__(self, name: str, value) -> None: # if this fails, go on to more involved attribute setting # (note that this matches __getattr__, above). - if name in self._internal_names_set: - object.__setattr__(self, name, value) - elif name in self._metadata: + if name in self._internal_names_set or name in self._metadata: object.__setattr__(self, name, value) else: try: existing = getattr(self, name) - if isinstance(existing, Index): + if isinstance(existing, Index) or name not in self._info_axis: object.__setattr__(self, name, value) - elif name in self._info_axis: - self[name] = value else: - object.__setattr__(self, name, value) + self[name] = value except (AttributeError, TypeError): if isinstance(self, ABCDataFrame) and (is_list_like(value)): warnings.warn( @@ -5493,17 +5484,16 @@ def _is_mixed_type(self) -> bool_t: def _check_inplace_setting(self, value) -> bool_t: """ check whether we allow in-place setting with this type of value """ - if self._is_mixed_type: - if not self._mgr.is_numeric_mixed_type: + if self._is_mixed_type and not self._mgr.is_numeric_mixed_type: - # allow an actual np.nan thru - if is_float(value) and np.isnan(value): - return True + # allow an actual np.nan thru + if is_float(value) and np.isnan(value): + return True - raise TypeError( - "Cannot do inplace boolean setting on " - "mixed-types with a non np.nan value" - ) + raise TypeError( + "Cannot do inplace boolean setting on " + "mixed-types with a non np.nan value" + ) return True @@ -6147,8 +6137,7 @@ def convert_dtypes( ) for col_name, col in self.items() ] - result = pd.concat(results, axis=1, copy=False) - return result + return pd.concat(results, axis=1, copy=False) # ---------------------------------------------------------------------- # Filling NA's @@ -6295,9 +6284,7 @@ def fillna( ) value = value.reindex(self.index, copy=False) value = value._values - elif not is_list_like(value): - pass - else: + elif is_list_like(value): raise TypeError( '"value" parameter must be a scalar, dict ' "or Series, but you passed a " @@ -7584,9 +7571,13 @@ def clip( upper = None # GH 2747 (arguments were reversed) - if lower is not None and upper is not None: - if is_scalar(lower) and is_scalar(upper): - lower, upper = min(lower, upper), max(lower, upper) + if ( + lower is not None + and upper is not None + and is_scalar(lower) + and is_scalar(upper) + ): + lower, upper = min(lower, upper), max(lower, upper) # fast-path for scalars if (lower is None or (is_scalar(lower) and is_number(lower))) and ( @@ -8323,10 +8314,9 @@ def first(self: FrameOrSeries, offset) -> FrameOrSeries: end_date = end = self.index[0] + offset # Tick-like, e.g. 3 weeks - if isinstance(offset, Tick): - if end_date in self.index: - end = self.index.searchsorted(end_date, side="left") - return self.iloc[:end] + if isinstance(offset, Tick) and end_date in self.index: + end = self.index.searchsorted(end_date, side="left") + return self.iloc[:end] return self.loc[:end] @@ -8732,17 +8722,19 @@ def _align_frame( is_series = isinstance(self, ABCSeries) - if axis is None or axis == 0: - if not self.index.equals(other.index): - join_index, ilidx, iridx = self.index.join( - other.index, how=join, level=level, return_indexers=True - ) + if (axis is None or axis == 0) and not self.index.equals(other.index): + join_index, ilidx, iridx = self.index.join( + other.index, how=join, level=level, return_indexers=True + ) - if axis is None or axis == 1: - if not is_series and not self.columns.equals(other.columns): - join_columns, clidx, cridx = self.columns.join( - other.columns, how=join, level=level, return_indexers=True - ) + if ( + (axis is None or axis == 1) + and not is_series + and not self.columns.equals(other.columns) + ): + join_columns, clidx, cridx = self.columns.join( + other.columns, how=join, level=level, return_indexers=True + ) if is_series: reindexers = {0: [join_index, ilidx]} @@ -8767,15 +8759,17 @@ def _align_frame( right = right.fillna(method=method, axis=fill_axis, limit=limit) # if DatetimeIndex have different tz, convert to UTC - if is_datetime64tz_dtype(left.index.dtype): - if left.index.tz != right.index.tz: - if join_index is not None: - # GH#33671 ensure we don't change the index on - # our original Series (NB: by default deep=False) - left = left.copy() - right = right.copy() - left.index = join_index - right.index = join_index + if ( + is_datetime64tz_dtype(left.index.dtype) + and left.index.tz != right.index.tz + and join_index is not None + ): + # GH#33671 ensure we don't change the index on + # our original Series (NB: by default deep=False) + left = left.copy() + right = right.copy() + left.index = join_index + right.index = join_index return ( left.__finalize__(self), @@ -8857,16 +8851,18 @@ def _align_series( right = right.fillna(fill_value, method=method, limit=limit) # if DatetimeIndex have different tz, convert to UTC - if is_series or (not is_series and axis == 0): - if is_datetime64tz_dtype(left.index.dtype): - if left.index.tz != right.index.tz: - if join_index is not None: - # GH#33671 ensure we don't change the index on - # our original Series (NB: by default deep=False) - left = left.copy() - right = right.copy() - left.index = join_index - right.index = join_index + if ( + (is_series or axis == 0) + and is_datetime64tz_dtype(left.index.dtype) + and left.index.tz != right.index.tz + and join_index is not None + ): + # GH#33671 ensure we don't change the index on + # our original Series (NB: by default deep=False) + left = left.copy() + right = right.copy() + left.index = join_index + right.index = join_index return ( left.__finalize__(self), @@ -9568,9 +9564,8 @@ def truncate( before = to_datetime(before) after = to_datetime(after) - if before is not None and after is not None: - if before > after: - raise ValueError(f"Truncate: {after} must be after {before}") + if before is not None and after is not None and before > after: + raise ValueError(f"Truncate: {after} must be after {before}") if len(ax) > 1 and ax.is_monotonic_decreasing: before, after = after, before diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index e48a42599a2a0..02691bc56aee5 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -79,12 +79,11 @@ def is_scalar_indexer(indexer, ndim: int) -> bool: ------- bool """ - if isinstance(indexer, tuple): - if len(indexer) == ndim: - return all( - is_integer(x) or (isinstance(x, np.ndarray) and x.ndim == len(x) == 1) - for x in indexer - ) + if isinstance(indexer, tuple) and len(indexer) == ndim: + return all( + is_integer(x) or (isinstance(x, np.ndarray) and x.ndim == len(x) == 1) + for x in indexer + ) return False @@ -457,12 +456,12 @@ def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: # indexers that are not array-like: integer, slice, Ellipsis, None) # In this context, tuples are not considered as array-like, as they have # a specific meaning in indexing (multi-dimensional indexing) - if is_list_like(indexer): - if isinstance(indexer, tuple): - return indexer - else: + if ( + is_list_like(indexer) + and isinstance(indexer, tuple) + or not is_list_like(indexer) + ): return indexer - # convert list-likes to array if not is_array_like(indexer): indexer = pd_array(indexer) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c2dad928845a7..32c9b451a3852 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -627,9 +627,7 @@ def _get_setitem_indexer(self, key): except TypeError as e: # invalid indexer type vs 'other' indexing errors - if "cannot do" in str(e): - raise - elif "unhashable type" in str(e): + if "cannot do" in str(e) or "unhashable type" in str(e): raise raise IndexingError(key) from e @@ -986,10 +984,7 @@ def _multi_take_opportunity(self, tup: Tuple) -> bool: return False # just too complicated - if any(com.is_bool_indexer(x) for x in tup): - return False - - return True + return not any(com.is_bool_indexer(x) for x in tup) def _multi_take(self, tup: Tuple): """ @@ -1417,11 +1412,7 @@ def _is_scalar_access(self, key: Tuple) -> bool: if len(key) != self.ndim: return False - for k in key: - if not is_integer(k): - return False - - return True + return all(is_integer(k) for k in key) def _validate_integer(self, key: int, axis: int) -> None: """ @@ -1544,12 +1535,11 @@ def _setitem_with_indexer(self, indexer, value): # if there is only one block/type, still have to take split path # unless the block is one-dimensional or it can hold the value - if not take_split_path and self.obj._mgr.blocks: - if self.ndim > 1: - # in case of dict, keys are indices - val = list(value.values()) if isinstance(value, dict) else value - blk = self.obj._mgr.blocks[0] - take_split_path = not blk._can_hold_element(val) + if not take_split_path and self.obj._mgr.blocks and self.ndim > 1: + # in case of dict, keys are indices + val = list(value.values()) if isinstance(value, dict) else value + blk = self.obj._mgr.blocks[0] + take_split_path = not blk._can_hold_element(val) # if we have any multi-indexes that have non-trivial slices # (not null slices) then we must take the split path, xref diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 52536583b9b0d..78d31e0ddf581 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -156,7 +156,7 @@ def find_valid_index(values, how: str): if how == "first": idxpos = is_valid[::].argmax() - if how == "last": + elif how == "last": idxpos = len(values) - 1 - is_valid[::-1].argmax() chk_notna = is_valid[idxpos] diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 8e917bb770247..6f233a87dd295 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -150,10 +150,7 @@ def _bn_ok_dtype(dtype: DtypeObj, name: str) -> bool: # further we also want to preserve NaN when all elements # are NaN, unlike bottleneck/numpy which consider this # to be 0 - if name in ["nansum", "nanprod"]: - return False - - return True + return name not in ["nansum", "nanprod"] return False @@ -185,14 +182,11 @@ def _get_fill_value( else: return -np.inf else: - if fill_value_typ is None: - return iNaT + if fill_value_typ is not None and fill_value_typ == "+inf": + # need the max int here + return np.iinfo(np.int64).max else: - if fill_value_typ == "+inf": - # need the max int here - return np.iinfo(np.int64).max - else: - return iNaT + return iNaT def _maybe_get_mask( @@ -395,15 +389,12 @@ def _na_for_min_count( if fill_value is NaT: fill_value = values.dtype.type("NaT", "ns") - if values.ndim == 1: - return fill_value - elif axis is None: + if values.ndim == 1 or axis is None: return fill_value else: result_shape = values.shape[:axis] + values.shape[axis + 1 :] - result = np.full(result_shape, fill_value, dtype=values.dtype) - return result + return np.full(result_shape, fill_value, dtype=values.dtype) def nanany( @@ -1139,12 +1130,12 @@ def nanskew( if isinstance(result, np.ndarray): result = np.where(m2 == 0, 0, result) result[count < 3] = np.nan - return result else: result = 0 if m2 == 0 else result if count < 3: return np.nan - return result + + return result @disallow("M8", "m8") diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 78d217c4688b6..f32ec008e7a70 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -176,8 +176,7 @@ def _convert_obj(self, obj): ------- obj : converted object """ - obj = obj._consolidate() - return obj + return obj._consolidate() def _get_binner_for_time(self): raise AbstractMethodError(self) @@ -1042,12 +1041,14 @@ def _downsample(self, how, **kwargs): return obj # do we have a regular frequency - if ax.freq is not None or ax.inferred_freq is not None: - - if len(self.grouper.binlabels) > len(ax) and how is None: + if ( + (ax.freq is not None or ax.inferred_freq is not None) + and len(self.grouper.binlabels) > len(ax) + and how is None + ): - # let's do an asfreq - return self.asfreq() + # let's do an asfreq + return self.asfreq() # we are downsampling # we want to call the actual grouper method here diff --git a/pandas/core/series.py b/pandas/core/series.py index e4a805a18bcdb..88e7ca8929b46 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -410,24 +410,25 @@ def _set_axis(self, axis: int, labels, fastpath: bool = False) -> None: if not fastpath: labels = ensure_index(labels) - if labels._is_all_dates: - if not isinstance(labels, (DatetimeIndex, PeriodIndex, TimedeltaIndex)): - try: - labels = DatetimeIndex(labels) - # need to set here because we changed the index - if fastpath: - self._mgr.set_axis(axis, labels) - warnings.warn( - "Automatically casting object-dtype Index of datetimes to " - "DatetimeIndex is deprecated and will be removed in a " - "future version. Explicitly cast to DatetimeIndex instead.", - FutureWarning, - stacklevel=3, - ) - except (tslibs.OutOfBoundsDatetime, ValueError): - # labels may exceeds datetime bounds, - # or not be a DatetimeIndex - pass + if labels._is_all_dates and not isinstance( + labels, (DatetimeIndex, PeriodIndex, TimedeltaIndex) + ): + try: + labels = DatetimeIndex(labels) + # need to set here because we changed the index + if fastpath: + self._mgr.set_axis(axis, labels) + warnings.warn( + "Automatically casting object-dtype Index of datetimes to " + "DatetimeIndex is deprecated and will be removed in a " + "future version. Explicitly cast to DatetimeIndex instead.", + FutureWarning, + stacklevel=3, + ) + except (tslibs.OutOfBoundsDatetime, ValueError): + # labels may exceeds datetime bounds, + # or not be a DatetimeIndex + pass object.__setattr__(self, "_index", labels) if not fastpath: @@ -1011,7 +1012,7 @@ def __setitem__(self, key, value): self._set_with_engine(key, value) except (KeyError, ValueError): values = self._values - if is_integer(key) and not self.index.inferred_type == "integer": + if is_integer(key) and self.index.inferred_type != "integer": # positional setter values[key] = value else: @@ -1345,9 +1346,7 @@ def __repr__(self) -> str: max_rows=max_rows, length=show_dimensions, ) - result = buf.getvalue() - - return result + return buf.getvalue() def to_string( self, @@ -1914,8 +1913,7 @@ def unique(self): ['b', 'a', 'c'] Categories (3, object): ['a' < 'b' < 'c'] """ - result = super().unique() - return result + return super().unique() def drop_duplicates(self, keep="first", inplace=False) -> Optional["Series"]: """ @@ -2630,9 +2628,7 @@ def dot(self, other): return self._constructor( np.dot(lvals, rvals), index=other.columns ).__finalize__(self, method="dot") - elif isinstance(other, Series): - return np.dot(lvals, rvals) - elif isinstance(rvals, np.ndarray): + elif isinstance(other, Series) or isinstance(rvals, np.ndarray): return np.dot(lvals, rvals) else: # pragma: no cover raise TypeError(f"unsupported type: {type(other)}") @@ -2729,8 +2725,7 @@ def append(self, to_append, ignore_index=False, verify_integrity=False): from pandas.core.reshape.concat import concat if isinstance(to_append, (list, tuple)): - to_concat = [self] - to_concat.extend(to_append) + to_concat = [self, *to_append] else: to_concat = [self, to_append] if any(isinstance(x, (ABCDataFrame,)) for x in to_concat[1:]): @@ -2773,8 +2768,7 @@ def _binop(self, other, func, level=None, fill_value=None): result = func(this_vals, other_vals) name = ops.get_op_result_name(self, other) - ret = this._construct_result(result, name) - return ret + return this._construct_result(result, name) def _construct_result( self, result: Union[ArrayLike, Tuple[ArrayLike, ArrayLike]], name: Label @@ -3825,9 +3819,7 @@ def explode(self, ignore_index: bool = False) -> "Series": else: index = self.index.repeat(counts) - result = self._constructor(values, index=index, name=self.name) - - return result + return self._constructor(values, index=index, name=self.name) def unstack(self, level=-1, fill_value=None): """ diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 2e32a7572adc7..b2534a30662d4 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -217,7 +217,7 @@ def decons_group_index(comp_labels, shape): x = comp_labels for i in reversed(range(len(shape))): labels = (x - y) % (factor * shape[i]) // factor - np.putmask(labels, comp_labels < 0, -1) + np.putmask(labels, x < 0, -1) label_list.append(labels) y = labels * factor factor *= shape[i] @@ -462,9 +462,7 @@ def _ensure_key_mapped_multiindex( for level in range(index.nlevels) ] - labels = type(index).from_arrays(mapped) - - return labels + return type(index).from_arrays(mapped) def ensure_key_mapped(values, key: Optional[Callable], levels=None): From 9c3598b3cd1c644f884a4866c898f2a93caee2fe Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 9 Nov 2020 15:47:54 +0000 Subject: [PATCH 02/22] apply some suggestions --- pandas/core/nanops.py | 6 ++++-- pandas/core/sorting.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index fbce7b7300f2e..08dc0f9e614ca 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -182,7 +182,7 @@ def _get_fill_value( else: return -np.inf else: - if fill_value_typ is not None and fill_value_typ == "+inf": + if fill_value_typ == "+inf": # need the max int here return np.iinfo(np.int64).max else: @@ -386,7 +386,9 @@ def _na_for_min_count( if fill_value is NaT: fill_value = values.dtype.type("NaT", "ns") - if values.ndim == 1 or axis is None: + if values.ndim == 1: + return fill_value + elif axis is None: return fill_value else: result_shape = values.shape[:axis] + values.shape[axis + 1 :] diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 6abf8cf89d7b5..06a512cb6da11 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -218,7 +218,7 @@ def decons_group_index(comp_labels, shape): x = comp_labels for i in reversed(range(len(shape))): labels = (x - y) % (factor * shape[i]) // factor - np.putmask(labels, x < 0, -1) + labels[x < 0] = -1 label_list.append(labels) y = labels * factor factor *= shape[i] From 873d4303493c3940bd7fe7b59700333c54c88fe9 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 9 Nov 2020 15:48:29 +0000 Subject: [PATCH 03/22] logicalnot -> ~ --- pandas/core/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 5fbd712d72ba1..e6ee4b2d1da9c 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1295,7 +1295,7 @@ def drop_duplicates(self, keep="first"): duplicated = self.duplicated(keep=keep) # pandas\core\base.py:1507: error: Value of type "IndexOpsMixin" is not # indexable [index] - return self[np.logical_not(duplicated)] # type: ignore[index] + return self[~duplicated] # type: ignore[index] def duplicated(self, keep="first"): return duplicated(self._values, keep=keep) From 31d29af7046de3a900e69c79dea36259ac5d132a Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 9 Nov 2020 15:49:42 +0000 Subject: [PATCH 04/22] parens --- pandas/core/algorithms.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 64315007d6f10..154323ec785bd 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -102,9 +102,9 @@ def _ensure_data( # we check some simple dtypes first if ( is_object_dtype(dtype) - or not is_object_dtype(dtype) + or (not is_object_dtype(dtype)) and is_object_dtype(values) - and dtype is None + and (dtype is None) ): return ensure_object(np.asarray(values)), np.dtype("object") try: From 7a51410a33c1de93a3724bfdcf8389d8a0b00db7 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 9 Nov 2020 15:49:54 +0000 Subject: [PATCH 05/22] parens --- pandas/core/algorithms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 154323ec785bd..1a0d97a375b73 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -101,7 +101,7 @@ def _ensure_data( # we check some simple dtypes first if ( - is_object_dtype(dtype) + (is_object_dtype(dtype)) or (not is_object_dtype(dtype)) and is_object_dtype(values) and (dtype is None) From a31e89d1fa3c01ba506039ff37b955e6d0f2b7b9 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 9 Nov 2020 15:50:04 +0000 Subject: [PATCH 06/22] parens --- pandas/core/algorithms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 1a0d97a375b73..0ad04fc5a15e2 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -103,7 +103,7 @@ def _ensure_data( if ( (is_object_dtype(dtype)) or (not is_object_dtype(dtype)) - and is_object_dtype(values) + and (is_object_dtype(values)) and (dtype is None) ): return ensure_object(np.asarray(values)), np.dtype("object") From 5e053d44e9dbdade2ac04412e5d706bd1c12dc2d Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Mon, 23 Nov 2020 17:34:45 +0000 Subject: [PATCH 07/22] factor out _set_join_index --- pandas/core/generic.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index be630c757a333..33bb11ea8269e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8842,12 +8842,7 @@ def _align_frame( and left.index.tz != right.index.tz and join_index is not None ): - # GH#33671 ensure we don't change the index on - # our original Series (NB: by default deep=False) - left = left.copy() - right = right.copy() - left.index = join_index - right.index = join_index + left, right = _set_join_index(left, right, join_index) return ( left.__finalize__(self), @@ -8936,12 +8931,7 @@ def _align_series( and left.index.tz != right.index.tz and join_index is not None ): - # GH#33671 ensure we don't change the index on - # our original Series (NB: by default deep=False) - left = left.copy() - right = right.copy() - left.index = join_index - right.index = join_index + left, right = _set_join_index(left, right, join_index) return ( left.__finalize__(self), @@ -11355,6 +11345,18 @@ def last_valid_index(self): return self._find_valid_index("last") +def _set_join_index( + left: Series, right: Series, join_index: Index +) -> Tuple[Series, Series]: + # GH#33671 ensure we don't change the index on + # our original Series (NB: by default deep=False) + left = left.copy() + right = right.copy() + left.index = join_index + right.index = join_index + return left, right + + def _doc_parms(cls): """Return a tuple of the doc parms.""" axis_descr = ( From 5f0a27833acdd18a158406e3a2040481dbd8a684 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Mon, 23 Nov 2020 17:51:38 +0000 Subject: [PATCH 08/22] :label: type --- pandas/core/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 33bb11ea8269e..b9cfd99370a8c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11346,8 +11346,8 @@ def last_valid_index(self): def _set_join_index( - left: Series, right: Series, join_index: Index -) -> Tuple[Series, Series]: + left: FrameOrSeries, right: FrameOrSeries, join_index: Index +) -> Tuple[FrameOrSeries, FrameOrSeries]: # GH#33671 ensure we don't change the index on # our original Series (NB: by default deep=False) left = left.copy() From 9383755c1d882ba130f052be5694ec43180a71cc Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Wed, 9 Dec 2020 18:56:38 +0000 Subject: [PATCH 09/22] coverage, remove putmask --- pandas/core/indexing.py | 4 +++- pandas/core/series.py | 3 ++- pandas/core/sorting.py | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 5296730099c0f..54c3e4597e21d 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -636,7 +636,9 @@ def _get_setitem_indexer(self, key): except TypeError as e: # invalid indexer type vs 'other' indexing errors - if "cannot do" in str(e) or "unhashable type" in str(e): + if "cannot do" in str(e): + raise + elif "unhashable type" in str(e): raise raise IndexingError(key) from e diff --git a/pandas/core/series.py b/pandas/core/series.py index a6f43312ed62a..40d129080244e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2657,7 +2657,8 @@ def append(self, to_append, ignore_index=False, verify_integrity=False): from pandas.core.reshape.concat import concat if isinstance(to_append, (list, tuple)): - to_concat = [self, *to_append] + to_concat = [self] + to_concat.extend(to_append) else: to_concat = [self, to_append] if any(isinstance(x, (ABCDataFrame,)) for x in to_concat[1:]): diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index fdf02370cf4ed..81d04196aa166 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -218,7 +218,7 @@ def decons_group_index(comp_labels, shape): x = comp_labels for i in reversed(range(len(shape))): labels = (x - y) % (factor * shape[i]) // factor - labels[x < 0] = -1 + np.putmask(labels, comp_labels < 0, -1) label_list.append(labels) y = labels * factor factor *= shape[i] From 0d10a5bc4aa3ae99c4ee6a7892848ee34073a795 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Wed, 16 Dec 2020 15:11:34 +0000 Subject: [PATCH 10/22] pass dtype to maybe_castable directly --- pandas/core/construction.py | 2 +- pandas/core/dtypes/cast.py | 12 +++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 1cd5d354c45e0..d6a9ac5a9cbc1 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -558,7 +558,7 @@ def _try_cast(arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bo # perf shortcut as this is the most common case if ( isinstance(arr, np.ndarray) - and maybe_castable(arr) + and maybe_castable(arr.dtype) and not copy and dtype is None ): diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index abcc60a15c641..3c980105e45a2 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1211,20 +1211,18 @@ def convert_dtypes( return inferred_dtype -def maybe_castable(arr: np.ndarray) -> bool: +def maybe_castable(dtype: Dtype) -> bool: # return False to force a non-fastpath - assert isinstance(arr, np.ndarray) # GH 37024 - # check datetime64[ns]/timedelta64[ns] are valid # otherwise try to coerce - kind = arr.dtype.kind + kind = dtype.kind if kind == "M": - return is_datetime64_ns_dtype(arr.dtype) + return is_datetime64_ns_dtype(dtype) elif kind == "m": - return is_timedelta64_ns_dtype(arr.dtype) + return is_timedelta64_ns_dtype(dtype) - return arr.dtype.name not in POSSIBLY_CAST_DTYPES + return dtype.name not in POSSIBLY_CAST_DTYPES def maybe_infer_to_datetimelike( From 938b2d5f37139cfc5bf1610ae250a90b9273b763 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Wed, 16 Dec 2020 15:12:23 +0000 Subject: [PATCH 11/22] revert sourcery's change of moving message closer to usage --- pandas/core/common.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 60f3ca168db14..622d903b03579 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -107,10 +107,8 @@ def is_bool_indexer(key: Any) -> bool: key = np.asarray(key) if not lib.is_bool_array(key): + na_msg = "Cannot mask with non-boolean array containing NA / NaN values" if lib.infer_dtype(key) == "boolean" and isna(key).any(): - na_msg = ( - "Cannot mask with non-boolean array containing NA / NaN values" - ) # Don't raise on e.g. ["A", "B", np.nan], see # test_loc_getitem_list_of_labels_categoricalindex_with_na raise ValueError(na_msg) From 7fda4e71c45988414f840cb98f98ca1a751127fb Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Wed, 16 Dec 2020 15:13:15 +0000 Subject: [PATCH 12/22] avoid duplicated check --- pandas/core/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 5244952c9aeaa..defdb949b73a4 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -610,8 +610,8 @@ def to_numpy(self, dtype=None, copy=False, na_value=lib.no_default, **kwargs): # TODO(GH-24345): Avoid potential double copy if copy or na_value is not lib.no_default: result = result.copy() - if na_value is not lib.no_default: - result[self.isna()] = na_value + if na_value is not lib.no_default: + result[self.isna()] = na_value return result @property From c36bf8daf30c400946f8f93fd53f0f1c7d69454e Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Wed, 16 Dec 2020 15:16:10 +0000 Subject: [PATCH 13/22] remove redundant check --- pandas/core/algorithms.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 3259094d3258c..b13538b02104c 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -110,12 +110,7 @@ def _ensure_data( values = extract_array(values, extract_numpy=True) # we check some simple dtypes first - if ( - (is_object_dtype(dtype)) - or (not is_object_dtype(dtype)) - and (is_object_dtype(values)) - and (dtype is None) - ): + if is_object_dtype(dtype) or (is_object_dtype(values) and (dtype is None)): return ensure_object(np.asarray(values)), np.dtype("object") try: if is_bool_dtype(values) or is_bool_dtype(dtype): From 1f782fd0bd27060503756c1f0687ac829f568002 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 26 Dec 2020 09:13:22 +0000 Subject: [PATCH 14/22] Dtype -> DtypeObj --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 4320c2381b297..07e1c48075bb2 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1281,7 +1281,7 @@ def convert_dtypes( return inferred_dtype -def maybe_castable(dtype: Dtype) -> bool: +def maybe_castable(dtype: DtypeObj) -> bool: # return False to force a non-fastpath # check datetime64[ns]/timedelta64[ns] are valid From 3f799553bd9e3b681c095f128da9edeefecddfae Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 17 Jan 2021 11:56:30 +0000 Subject: [PATCH 15/22] remove redundant parens --- pandas/core/algorithms.py | 2 +- pandas/core/generic.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 741876dbdfd33..848471009a58f 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -114,7 +114,7 @@ def _ensure_data( values = extract_array(values, extract_numpy=True) # we check some simple dtypes first - if is_object_dtype(dtype) or (is_object_dtype(values) and (dtype is None)): + if is_object_dtype(dtype) or (is_object_dtype(values) and dtype is None): return ensure_object(np.asarray(values)), np.dtype("object") try: if is_bool_dtype(values) or is_bool_dtype(dtype): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e1f6c7da2dffc..9471683ca00bf 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5343,7 +5343,7 @@ def sample( raise ValueError("Only integers accepted as `n` values") elif n is None and frac is not None: n = round(frac * axis_length) - elif n is not None and frac is not None: + elif frac is not None: raise ValueError("Please enter a value for `frac` OR `n`, not both") # Check for negative sizes From 988838325fc41b1c40d7bb2953f5385ebb10d00e Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 17 Jan 2021 11:59:14 +0000 Subject: [PATCH 16/22] use set to check for membership --- pandas/core/aggregation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index 9ffe9a0568040..55863e649078d 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -499,10 +499,10 @@ def transform_dict_like( try: results[name] = transform(colg, how, 0, *args, **kwargs) except Exception as err: - if str(err) in [ + if str(err) in { "Function did not transform", "No transform functions were provided", - ]: + }: raise err # combine results From 5125e5fbde280100696924cf799f4fffaebfe6e1 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 18 Jan 2021 12:10:38 +0000 Subject: [PATCH 17/22] revert change in generic.py --- pandas/core/generic.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 88e4adf7288c9..8abc81444e8f0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5494,10 +5494,12 @@ def __setattr__(self, name: str, value) -> None: else: try: existing = getattr(self, name) - if isinstance(existing, Index) or name not in self._info_axis: + if isinstance(existing, Index): object.__setattr__(self, name, value) - else: + elif name in self._info_axis: self[name] = value + else: + object.__setattr__(self, name, value) except (AttributeError, TypeError): if isinstance(self, ABCDataFrame) and (is_list_like(value)): warnings.warn( From d693f581e333b88c4aa629e5cc22a75ec90bfe18 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 18 Jan 2021 12:12:55 +0000 Subject: [PATCH 18/22] revert _set_join_index refactoring --- pandas/core/generic.py | 44 ++++++++++++++++++------------------------ 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8abc81444e8f0..d3d8c42b66b6b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8675,12 +8675,15 @@ def _align_frame( right = right.fillna(method=method, axis=fill_axis, limit=limit) # if DatetimeIndex have different tz, convert to UTC - if ( - is_datetime64tz_dtype(left.index.dtype) - and left.index.tz != right.index.tz - and join_index is not None - ): - left, right = _set_join_index(left, right, join_index) + if is_datetime64tz_dtype(left.index.dtype): + if left.index.tz != right.index.tz: + if join_index is not None: + # GH#33671 ensure we don't change the index on + # our original Series (NB: by default deep=False) + left = left.copy() + right = right.copy() + left.index = join_index + right.index = join_index return ( left.__finalize__(self), @@ -8763,13 +8766,16 @@ def _align_series( right = right.fillna(fill_value, method=method, limit=limit) # if DatetimeIndex have different tz, convert to UTC - if ( - (is_series or axis == 0) - and is_datetime64tz_dtype(left.index.dtype) - and left.index.tz != right.index.tz - and join_index is not None - ): - left, right = _set_join_index(left, right, join_index) + if is_series or (not is_series and axis == 0): + if is_datetime64tz_dtype(left.index.dtype): + if left.index.tz != right.index.tz: + if join_index is not None: + # GH#33671 ensure we don't change the index on + # our original Series (NB: by default deep=False) + left = left.copy() + right = right.copy() + left.index = join_index + right.index = join_index return ( left.__finalize__(self), @@ -11095,18 +11101,6 @@ def last_valid_index(self): return self._find_valid_index("last") -def _set_join_index( - left: FrameOrSeries, right: FrameOrSeries, join_index: Index -) -> Tuple[FrameOrSeries, FrameOrSeries]: - # GH#33671 ensure we don't change the index on - # our original Series (NB: by default deep=False) - left = left.copy() - right = right.copy() - left.index = join_index - right.index = join_index - return left, right - - def _doc_parms(cls): """Return a tuple of the doc params.""" From d4e3bd61aaba6779484f2453034cc5b79202d5a5 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 18 Jan 2021 12:20:20 +0000 Subject: [PATCH 19/22] deduplicate from merge --- pandas/core/generic.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d3d8c42b66b6b..be765d2cc9101 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11101,10 +11101,6 @@ def last_valid_index(self): return self._find_valid_index("last") -def _doc_parms(cls): - """Return a tuple of the doc params.""" - - def _doc_params(cls): """Return a tuple of the doc params.""" axis_descr = ( From 5559352d466d1746a59f5a2e77658fb9123db535 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 18 Jan 2021 12:23:04 +0000 Subject: [PATCH 20/22] :art: --- pandas/core/algorithms.py | 1 + pandas/core/indexers.py | 1 + 2 files changed, 2 insertions(+) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 848471009a58f..f2899bafc83a2 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -116,6 +116,7 @@ def _ensure_data( # we check some simple dtypes first if is_object_dtype(dtype) or (is_object_dtype(values) and dtype is None): return ensure_object(np.asarray(values)), np.dtype("object") + try: if is_bool_dtype(values) or is_bool_dtype(dtype): # we are actually coercing to uint64 diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index 3157418c88822..d5a9068fc24df 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -486,6 +486,7 @@ def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: or not is_list_like(indexer) ): return indexer + # convert list-likes to array if not is_array_like(indexer): indexer = pd_array(indexer) From 4f14b81da002ca84131fc424dff16d54fe11de40 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 18 Jan 2021 16:51:26 +0000 Subject: [PATCH 21/22] some reversions --- pandas/core/algorithms.py | 4 +++- pandas/core/apply.py | 7 +++---- pandas/core/generic.py | 8 ++++++-- pandas/core/indexers.py | 9 ++++----- pandas/core/series.py | 4 +++- 5 files changed, 19 insertions(+), 13 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index f2899bafc83a2..6553a67df47e2 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -114,7 +114,9 @@ def _ensure_data( values = extract_array(values, extract_numpy=True) # we check some simple dtypes first - if is_object_dtype(dtype) or (is_object_dtype(values) and dtype is None): + if is_object_dtype(dtype): + return ensure_object(np.asarray(values)), np.dtype("object") + elif is_object_dtype(values) and dtype is None: return ensure_object(np.asarray(values)), np.dtype("object") try: diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 0dc0b9e40093d..f7c7220985138 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -487,10 +487,9 @@ def wrap_results_for_axis( else: raise - if not isinstance(results[0], ABCSeries) and len(result.index) == len( - self.res_columns - ): - result.index = self.res_columns + if not isinstance(results[0], ABCSeries): + if len(result.index) == len(self.res_columns): + result.index = self.res_columns if len(result.columns) == len(res_index): result.columns = res_index diff --git a/pandas/core/generic.py b/pandas/core/generic.py index be765d2cc9101..27b1751a5045e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5489,7 +5489,9 @@ def __setattr__(self, name: str, value) -> None: # if this fails, go on to more involved attribute setting # (note that this matches __getattr__, above). - if name in self._internal_names_set or name in self._metadata: + if name in self._internal_names_set: + object.__setattr__(self, name, value) + elif name in self._metadata: object.__setattr__(self, name, value) else: try: @@ -6405,7 +6407,9 @@ def fillna( ) value = value.reindex(self.index, copy=False) value = value._values - elif is_list_like(value): + elif not is_list_like(value): + pass + else: raise TypeError( '"value" parameter must be a scalar, dict ' "or Series, but you passed a " diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index d5a9068fc24df..399953fc17c73 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -480,11 +480,10 @@ def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: # indexers that are not array-like: integer, slice, Ellipsis, None) # In this context, tuples are not considered as array-like, as they have # a specific meaning in indexing (multi-dimensional indexing) - if ( - is_list_like(indexer) - and isinstance(indexer, tuple) - or not is_list_like(indexer) - ): + if is_list_like(indexer): + if isinstance(indexer, tuple): + return indexer + else: return indexer # convert list-likes to array diff --git a/pandas/core/series.py b/pandas/core/series.py index fb29c1b6408ac..e1399f73128e3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2558,7 +2558,9 @@ def dot(self, other): return self._constructor( np.dot(lvals, rvals), index=other.columns ).__finalize__(self, method="dot") - elif isinstance(other, Series) or isinstance(rvals, np.ndarray): + elif isinstance(other, Series): + return np.dot(lvals, rvals) + elif isinstance(rvals, np.ndarray): return np.dot(lvals, rvals) else: # pragma: no cover raise TypeError(f"unsupported type: {type(other)}") From c03d87640ff2468d1aa70c6c7c046a24660f43d8 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 18 Jan 2021 17:00:02 +0000 Subject: [PATCH 22/22] revert maybe_castable --- pandas/core/construction.py | 10 +++------- pandas/core/dtypes/cast.py | 12 +++++++----- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 5431be28775ee..2980547e23f24 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -583,13 +583,9 @@ def _try_cast(arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bo Otherwise an object array is returned. """ # perf shortcut as this is the most common case - if ( - isinstance(arr, np.ndarray) - and maybe_castable(arr.dtype) - and not copy - and dtype is None - ): - return arr + if isinstance(arr, np.ndarray): + if maybe_castable(arr) and not copy and dtype is None: + return arr if isinstance(dtype, ExtensionDtype) and (dtype.kind != "M" or is_sparse(dtype)): # create an extension array from its dtype diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index e9b3238a74186..0941967ef6bee 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1301,18 +1301,20 @@ def convert_dtypes( return inferred_dtype -def maybe_castable(dtype: DtypeObj) -> bool: +def maybe_castable(arr: np.ndarray) -> bool: # return False to force a non-fastpath + assert isinstance(arr, np.ndarray) # GH 37024 + # check datetime64[ns]/timedelta64[ns] are valid # otherwise try to coerce - kind = dtype.kind + kind = arr.dtype.kind if kind == "M": - return is_datetime64_ns_dtype(dtype) + return is_datetime64_ns_dtype(arr.dtype) elif kind == "m": - return is_timedelta64_ns_dtype(dtype) + return is_timedelta64_ns_dtype(arr.dtype) - return dtype.name not in POSSIBLY_CAST_DTYPES + return arr.dtype.name not in POSSIBLY_CAST_DTYPES def maybe_infer_to_datetimelike(