Skip to content

CLN: assorted #53086

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ Metadata

Other
^^^^^
- Bug in :class:`FloatingArray.__contains__` with ``NaN`` item incorrectly returning ``False`` when ``NaN`` values are presnet (:issue:`52840`)
- Bug in :class:`FloatingArray.__contains__` with ``NaN`` item incorrectly returning ``False`` when ``NaN`` values are present (:issue:`52840`)
- Bug in :func:`assert_almost_equal` now throwing assertion error for two unequal sets (:issue:`51727`)
- Bug in :func:`assert_frame_equal` checks category dtypes even when asked not to check index type (:issue:`52126`)
- Bug in :meth:`DataFrame.reindex` with a ``fill_value`` that should be inferred with a :class:`ExtensionDtype` incorrectly inferring ``object`` dtype (:issue:`52586`)
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ def kth_smallest(numeric_t[::1] arr, Py_ssize_t k) -> numeric_t:
def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
cdef:
Py_ssize_t i, xi, yi, N, K
bint minpv
int64_t minpv
float64_t[:, ::1] result
ndarray[uint8_t, ndim=2] mask
int64_t nobs = 0
Expand All @@ -357,7 +357,7 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
if minp is None:
minpv = 1
else:
minpv = <int>minp
minpv = <int64_t>minp

result = np.empty((K, K), dtype=np.float64)
mask = np.isfinite(mat).view(np.uint8)
Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/index.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ from pandas._typing import npt
from pandas import MultiIndex
from pandas.core.arrays import ExtensionArray

multiindex_nulls_shift: int

class IndexEngine:
over_size_threshold: bool
def __init__(self, values: np.ndarray) -> None: ...
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/internals.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -102,5 +102,5 @@ class BlockValuesRefs:
referenced_blocks: list[weakref.ref]
def __init__(self, blk: SharedBlock | None = ...) -> None: ...
def add_reference(self, blk: SharedBlock) -> None: ...
def add_index_reference(self, index: object) -> None: ...
def add_index_reference(self, index: Index) -> None: ...
def has_reference(self) -> bool: ...
2 changes: 1 addition & 1 deletion pandas/_libs/internals.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -966,7 +966,7 @@ cdef class BlockValuesRefs:

Parameters
----------
index: object
index : Index
The index that the new reference should point to.
"""
self.referenced_blocks.append(weakref.ref(index))
Expand Down
6 changes: 3 additions & 3 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2612,7 +2612,7 @@ def maybe_convert_objects(ndarray[object] objects,
return tdi._data._ndarray
seen.object_ = True

if seen.period_:
elif seen.period_:
if is_period_array(objects):
from pandas import PeriodIndex
pi = PeriodIndex(objects)
Expand All @@ -2621,7 +2621,7 @@ def maybe_convert_objects(ndarray[object] objects,
return pi._data
seen.object_ = True

if seen.interval_:
elif seen.interval_:
if is_interval_array(objects):
from pandas import IntervalIndex
ii = IntervalIndex(objects)
Expand All @@ -2631,7 +2631,7 @@ def maybe_convert_objects(ndarray[object] objects,

seen.object_ = True

if seen.nat_:
elif seen.nat_:
if not seen.object_ and not seen.numeric_ and not seen.bool_:
# all NaT, None, or nan (at least one NaT)
# see GH#49340 for discussion of desired behavior
Expand Down
4 changes: 1 addition & 3 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -852,9 +852,7 @@ def _constructor_sliced(self):


class SubclassedCategorical(Categorical):
@property
def _constructor(self):
return SubclassedCategorical
pass


def _make_skipna_wrapper(alternative, skipna_alternative=None):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1240,7 +1240,7 @@ def take(
if not is_array_like(arr):
arr = np.asarray(arr)

indices = np.asarray(indices, dtype=np.intp)
indices = ensure_platform_int(indices)

if allow_fill:
# Pandas style, -1 means NA
Expand Down
5 changes: 0 additions & 5 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1086,15 +1086,10 @@ def agg(self):
result = super().agg()
if result is None:
f = self.f
kwargs = self.kwargs

# string, list-like, and dict-like are entirely handled in super
assert callable(f)

# we can be called from an inner function which
# passes this meta-data
kwargs.pop("_level", None)

# try a regular apply, this evaluates lambdas
# row-by-row; however if the lambda is expected a Series
# expression, e.g.: lambda x: x-x.quantile(0.25)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/array_algos/putmask.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def setitem_datetimelike_compat(values: np.ndarray, num_set: int, other):
if values.dtype == object:
dtype, _ = infer_dtype_from(other)

if isinstance(dtype, np.dtype) and dtype.kind in "mM":
if lib.is_np_dtype(dtype, "mM"):
# https://github.com/numpy/numpy/issues/12550
# timedelta64 will incorrectly cast to int
if not is_list_like(other):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2022,7 +2022,7 @@ def _validate_listlike(self, value):
"Cannot set a Categorical with another, "
"without identical categories"
)
# is_dtype_equal implies categories_match_up_to_permutation
# dtype equality implies categories_match_up_to_permutation
value = self._encode_with_my_categories(value)
return value._codes

Expand Down
7 changes: 1 addition & 6 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@

from pandas.core.arrays import datetimelike as dtl
from pandas.core.arrays._ranges import generate_regular_range
from pandas.core.arrays.sparse.dtype import SparseDtype
import pandas.core.common as com

from pandas.tseries.frequencies import get_period_alias
Expand Down Expand Up @@ -2035,11 +2034,7 @@ def _sequence_to_dt64ns(
if out_unit is not None:
out_dtype = np.dtype(f"M8[{out_unit}]")

if (
data_dtype == object
or is_string_dtype(data_dtype)
or isinstance(data_dtype, SparseDtype)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sparse is no longer necessary?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it gets ruled out by the ensure_arraylike_for_datetimelike call on L2023

):
if data_dtype == object or is_string_dtype(data_dtype):
# TODO: We do not have tests specific to string-dtypes,
# also complex or categorical or other extension
copy = False
Expand Down
10 changes: 1 addition & 9 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
pandas_dtype,
)
from pandas.core.dtypes.dtypes import BaseMaskedDtype
from pandas.core.dtypes.inference import is_array_like
from pandas.core.dtypes.missing import (
array_equivalent,
is_valid_na_for_dtype,
Expand Down Expand Up @@ -172,20 +171,13 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any:

return type(self)(self._data[item], newmask)

@doc(ExtensionArray.fillna)
@doc(ExtensionArray.fillna)
def fillna(self, value=None, method=None, limit: int | None = None) -> Self:
value, method = validate_fillna_kwargs(value, method)

mask = self._mask

if is_array_like(value):
if len(value) != len(self):
raise ValueError(
f"Length of 'value' does not match. Got ({len(value)}) "
f" expected {len(self)}"
)
value = value[mask]
value = missing.check_value_size(value, mask, len(self))

if mask.any():
if method is not None:
Expand Down
14 changes: 9 additions & 5 deletions pandas/core/flags.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from __future__ import annotations

from typing import TYPE_CHECKING
import weakref

if TYPE_CHECKING:
from pandas.core.generic import NDFrame


class Flags:
"""
Expand Down Expand Up @@ -44,9 +48,9 @@ class Flags:
<Flags(allows_duplicate_labels=True)>
"""

_keys = {"allows_duplicate_labels"}
_keys: set[str] = {"allows_duplicate_labels"}

def __init__(self, obj, *, allows_duplicate_labels) -> None:
def __init__(self, obj: NDFrame, *, allows_duplicate_labels: bool) -> None:
self._allows_duplicate_labels = allows_duplicate_labels
self._obj = weakref.ref(obj)

Expand Down Expand Up @@ -95,21 +99,21 @@ def allows_duplicate_labels(self, value: bool) -> None:

self._allows_duplicate_labels = value

def __getitem__(self, key):
def __getitem__(self, key: str):
if key not in self._keys:
raise KeyError(key)

return getattr(self, key)

def __setitem__(self, key, value) -> None:
def __setitem__(self, key: str, value) -> None:
if key not in self._keys:
raise ValueError(f"Unknown flag {key}. Must be one of {self._keys}")
setattr(self, key, value)

def __repr__(self) -> str:
return f"<Flags(allows_duplicate_labels={self.allows_duplicate_labels})>"

def __eq__(self, other):
def __eq__(self, other) -> bool:
if isinstance(other, type(self)):
return self.allows_duplicate_labels == other.allows_duplicate_labels
return False
4 changes: 2 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6530,7 +6530,7 @@ def sort_values(
axis: Axis = ...,
ascending=...,
inplace: Literal[True],
kind: str = ...,
kind: SortKind = ...,
na_position: str = ...,
ignore_index: bool = ...,
key: ValueKeyFunc = ...,
Expand All @@ -6544,7 +6544,7 @@ def sort_values(
axis: Axis = 0,
ascending: bool | list[bool] | tuple[bool, ...] = True,
inplace: bool = False,
kind: str = "quicksort",
kind: SortKind = "quicksort",
na_position: str = "last",
ignore_index: bool = False,
key: ValueKeyFunc = None,
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6206,7 +6206,7 @@ def _check_inplace_setting(self, value) -> bool_t:
"""check whether we allow in-place setting with this type of value"""
if self._is_mixed_type and not self._mgr.is_numeric_mixed_type:
# allow an actual np.nan through
if is_float(value) and np.isnan(value) or value is lib.no_default:
if (is_float(value) and np.isnan(value)) or value is lib.no_default:
return True

raise TypeError(
Expand Down
15 changes: 3 additions & 12 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2868,8 +2868,9 @@ def fillna(self, value=None, downcast=None):
DataFrame.fillna : Fill NaN values of a DataFrame.
Series.fillna : Fill NaN Values of a Series.
"""
if not is_scalar(value):
raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}")

value = self._require_scalar(value)
if self.hasnans:
result = self.putmask(self._isnan, value)
if downcast is None:
Expand Down Expand Up @@ -3211,7 +3212,7 @@ def union(self, other, sort=None):

elif not len(other) or self.equals(other):
# NB: whether this (and the `if not len(self)` check below) come before
# or after the is_dtype_equal check above affects the returned dtype
# or after the dtype equality check above affects the returned dtype
result = self._get_reconciled_name_object(other)
if sort is True:
return result.sort_values()
Expand Down Expand Up @@ -5119,16 +5120,6 @@ def _validate_fill_value(self, value):
raise TypeError
return value

@final
def _require_scalar(self, value):
"""
Check that this is a scalar value that we can use for setitem-like
operations without changing dtype.
"""
if not is_scalar(value):
raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}")
return value

def _is_memory_usage_qualified(self) -> bool:
"""
Return a boolean if we need a qualified .info display.
Expand Down
6 changes: 1 addition & 5 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1116,11 +1116,7 @@ def _engine(self):
# calculating the indexer are shifted to 0
sizes = np.ceil(
np.log2(
[
len(level)
+ libindex.multiindex_nulls_shift # type: ignore[attr-defined]
for level in self.levels
]
[len(level) + libindex.multiindex_nulls_shift for level in self.levels]
)
)

Expand Down
6 changes: 1 addition & 5 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,11 +309,7 @@ def should_store(self, value: ArrayLike) -> bool:
-------
bool
"""
# faster equivalent to is_dtype_equal(value.dtype, self.dtype)
try:
return value.dtype == self.dtype
except TypeError:
return False
return value.dtype == self.dtype

# ---------------------------------------------------------------------
# Apply/Reduce and Helpers
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/ops/invalid.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@
from __future__ import annotations

import operator
from typing import TYPE_CHECKING

import numpy as np

if TYPE_CHECKING:
from pandas._typing import npt

def invalid_comparison(left, right, op) -> np.ndarray:

def invalid_comparison(left, right, op) -> npt.NDArray[np.bool_]:
"""
If a comparison has mismatched types and is not necessarily meaningful,
follow python3 conventions by:
Expand Down
16 changes: 6 additions & 10 deletions pandas/core/ops/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,10 @@

import numpy as np

from pandas.core.dtypes.common import (
is_float_dtype,
is_integer_dtype,
is_scalar,
)

from pandas.core import roperator


def _fill_zeros(result, x, y):
def _fill_zeros(result: np.ndarray, x, y):
"""
If this is a reversed op, then flip x,y

Expand All @@ -46,11 +40,11 @@ def _fill_zeros(result, x, y):

Mask the nan's from x.
"""
if is_float_dtype(result.dtype):
if result.dtype.kind == "f":
return result

is_variable_type = hasattr(y, "dtype")
is_scalar_type = is_scalar(y)
is_scalar_type = not isinstance(y, np.ndarray)

if not is_variable_type and not is_scalar_type:
# e.g. test_series_ops_name_retention with mod we get here with list/tuple
Expand All @@ -59,7 +53,7 @@ def _fill_zeros(result, x, y):
if is_scalar_type:
y = np.array(y)

if is_integer_dtype(y.dtype):
if y.dtype.kind in "iu":
ymask = y == 0
if ymask.any():
# GH#7325, mask and nans must be broadcastable
Expand Down Expand Up @@ -143,7 +137,9 @@ def dispatch_fill_zeros(op, left, right, result):
----------
op : function (operator.add, operator.div, ...)
left : object (np.ndarray for non-reversed ops)
We have excluded ExtensionArrays here
right : object (np.ndarray for reversed ops)
We have excluded ExtensionArrays here
result : ndarray

Returns
Expand Down
Loading