Skip to content

CLN: assorted #52569

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ Performance improvements
- Performance improvement in :meth:`Series.combine_first` (:issue:`51777`)
- Performance improvement in :meth:`MultiIndex.set_levels` and :meth:`MultiIndex.set_codes` when ``verify_integrity=True`` (:issue:`51873`)
- Performance improvement in :func:`factorize` for object columns not containing strings (:issue:`51921`)
- Performance improvement in :func:`concat` (:issue:`52291`, :issue:`52290`)
- Performance improvement in :class:`Series` reductions (:issue:`52341`)
- Performance improvement in :meth:`Series.to_numpy` when dtype is a numpy float dtype and ``na_value`` is ``np.nan`` (:issue:`52430`)
- Performance improvement in :meth:`Series.corr` and :meth:`Series.cov` for extension dtypes (:issue:`52502`)
Expand Down
4 changes: 2 additions & 2 deletions pandas/_config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@
from pandas._config.display import detect_console_encoding


def using_copy_on_write():
def using_copy_on_write() -> bool:
_mode_options = _global_config["mode"]
return _mode_options["copy_on_write"] and _mode_options["data_manager"] == "block"


def using_nullable_dtypes():
def using_nullable_dtypes() -> bool:
_mode_options = _global_config["mode"]
return _mode_options["nullable_dtypes"]
11 changes: 6 additions & 5 deletions pandas/_libs/tslibs/timestamps.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -131,12 +131,13 @@ class Timestamp(datetime):
def astimezone(self, tz: _tzinfo | None) -> Self: ... # type: ignore[override]
def ctime(self) -> str: ...
def isoformat(self, sep: str = ..., timespec: str = ...) -> str: ...
# Return type "datetime" of "strptime" incompatible with return type "Timestamp"
# in supertype "datetime"
@classmethod
def strptime( # type: ignore[override]
cls, date_string: str, format: str
) -> datetime: ...
def strptime(
# Note: strptime is actually disabled and raises NotImplementedError
cls,
date_string: str,
format: str,
) -> Self: ...
def utcoffset(self) -> timedelta | None: ...
def tzname(self) -> str | None: ...
def dst(self) -> timedelta | None: ...
Expand Down
2 changes: 1 addition & 1 deletion pandas/_testing/asserters.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,7 @@ def assert_period_array_equal(left, right, obj: str = "PeriodArray") -> None:
_check_isinstance(left, right, PeriodArray)

assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray")
assert_attr_equal("freq", left, right, obj=obj)
assert_attr_equal("dtype", left, right, obj=obj)


def assert_datetime_array_equal(
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> npt.NDArray[np.bool_]:

if (
len(values) > 0
and is_numeric_dtype(values)
and is_numeric_dtype(values.dtype)
and not is_signed_integer_dtype(comps)
):
# GH#46485 Use object to avoid upcast to float64 later
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def transform(self) -> DataFrame | Series:
# DataFrameGroupBy, BaseWindow, Resampler]"; expected "Union[DataFrame,
# Series]"
if not isinstance(result, (ABCSeries, ABCDataFrame)) or not result.index.equals(
obj.index # type:ignore[arg-type]
obj.index # type: ignore[arg-type]
):
raise ValueError("Function did not transform")

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1150,7 +1150,7 @@ def _concat_same_type(cls, to_concat) -> Self:
"""
chunks = [array for ea in to_concat for array in ea._pa_array.iterchunks()]
if to_concat[0].dtype == "string":
# StringDtype has no attrivute pyarrow_dtype
# StringDtype has no attribute pyarrow_dtype
pa_dtype = pa.string()
else:
pa_dtype = to_concat[0].dtype.pyarrow_dtype
Expand Down
7 changes: 2 additions & 5 deletions pandas/core/arrays/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,7 @@
missing as libmissing,
)

from pandas.core.dtypes.common import (
is_list_like,
is_numeric_dtype,
)
from pandas.core.dtypes.common import is_list_like
from pandas.core.dtypes.dtypes import register_extension_dtype
from pandas.core.dtypes.missing import isna

Expand Down Expand Up @@ -180,7 +177,7 @@ def coerce_to_array(
if isinstance(values, np.ndarray) and values.dtype == np.bool_:
if copy:
values = values.copy()
elif isinstance(values, np.ndarray) and is_numeric_dtype(values.dtype):
elif isinstance(values, np.ndarray) and values.dtype.kind in "iufcb":
mask_values = isna(values)

values_bool = np.zeros(len(values), dtype=bool)
Expand Down
9 changes: 1 addition & 8 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@
from pandas.core import (
algorithms,
nanops,
ops,
)
from pandas.core.algorithms import (
checked_add_with_arr,
Expand Down Expand Up @@ -903,13 +902,7 @@ def _cmp_method(self, other, op):

dtype = getattr(other, "dtype", None)
if is_object_dtype(dtype):
# We have to use comp_method_OBJECT_ARRAY instead of numpy
# comparison otherwise it would fail to raise when
# comparing tz-aware and tz-naive
result = ops.comp_method_OBJECT_ARRAY(
op, np.asarray(self.astype(object)), other
)
return result
return op(np.asarray(self, dtype=object), other)

if other is NaT:
if op is operator.ne:
Expand Down
15 changes: 0 additions & 15 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1805,21 +1805,6 @@ def _formatter(self, boxed: bool = False):
# This will infer the correct formatter from the dtype of the values.
return None

# ------------------------------------------------------------------------
# GroupBy Methods

def _groupby_op(
self,
*,
how: str,
has_dropped_na: bool,
min_count: int,
ngroups: int,
ids: npt.NDArray[np.intp],
**kwargs,
):
raise NotImplementedError(f"{self.dtype} dtype not supported")


def _make_sparse(
arr: np.ndarray,
Expand Down
19 changes: 0 additions & 19 deletions pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,8 @@
DT64NS_DTYPE,
TD64NS_DTYPE,
ensure_object,
is_bool_dtype,
is_dtype_equal,
is_extension_array_dtype,
is_integer_dtype,
is_object_dtype,
is_scalar,
is_string_or_object_np_dtype,
Expand Down Expand Up @@ -431,23 +429,6 @@ def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame:
notnull = notna


def isna_compat(arr, fill_value=np.nan) -> bool:
"""
Parameters
----------
arr: a numpy array
fill_value: fill value, default to np.nan

Returns
-------
True if we can fill using this fill_value
"""
if isna(fill_value):
dtype = arr.dtype
return not (is_bool_dtype(dtype) or is_integer_dtype(dtype))
return True


def array_equivalent(
left,
right,
Expand Down
14 changes: 2 additions & 12 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,15 +278,8 @@
axis : int or str, optional
Axis to target. Can be either the axis name ('index', 'columns')
or number (0, 1).""",
"replace_iloc": """
This differs from updating with ``.loc`` or ``.iloc``, which require
you to specify a location to update with some value.""",
}

_numeric_only_doc = """numeric_only : bool, default False
Include only float, int, boolean data.
"""

_merge_doc = """
Merge DataFrame or named Series objects with a database-style join.

Expand Down Expand Up @@ -5736,7 +5729,7 @@ def set_index(

# error: Argument 1 to "append" of "list" has incompatible type
# "Union[Index, Series]"; expected "Index"
arrays.append(col) # type:ignore[arg-type]
arrays.append(col) # type: ignore[arg-type]
names.append(col.name)
elif isinstance(col, (list, np.ndarray)):
# error: Argument 1 to "append" of "list" has incompatible type
Expand Down Expand Up @@ -7791,10 +7784,7 @@ def _flex_arith_method(
# through the DataFrame path
raise NotImplementedError(f"fill_value {fill_value} not supported.")

other = ops.maybe_prepare_scalar_for_op(
other,
self.shape,
)
other = ops.maybe_prepare_scalar_for_op(other, self.shape)
self, other = self._align_for_op(other, axis, flex=True, level=level)

with np.errstate(all="ignore"):
Expand Down
21 changes: 3 additions & 18 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,16 +212,12 @@
"axes": "keywords for axes",
"klass": "Series/DataFrame",
"axes_single_arg": "{0 or 'index'} for Series, {0 or 'index', 1 or 'columns'} for DataFrame", # noqa:E501
"args_transpose": "axes to permute (int or label for object)",
"inplace": """
inplace : bool, default False
If True, performs operation inplace and returns None.""",
"optional_by": """
by : str or list of str
Name or list of names to sort by""",
"replace_iloc": """
This differs from updating with ``.loc`` or ``.iloc``, which require
you to specify a location to update with some value.""",
}


Expand Down Expand Up @@ -264,22 +260,11 @@ class NDFrame(PandasObject, indexing.IndexingMixin):
# ----------------------------------------------------------------------
# Constructors

def __init__(
self,
data: Manager,
copy: bool_t = False,
attrs: Mapping[Hashable, Any] | None = None,
) -> None:
# copy kwarg is retained for mypy compat, is not used

def __init__(self, data: Manager) -> None:
object.__setattr__(self, "_is_copy", None)
object.__setattr__(self, "_mgr", data)
object.__setattr__(self, "_item_cache", {})
if attrs is None:
attrs = {}
else:
attrs = dict(attrs)
object.__setattr__(self, "_attrs", attrs)
object.__setattr__(self, "_attrs", {})
object.__setattr__(self, "_flags", Flags(self, allows_duplicate_labels=True))

@final
Expand Down Expand Up @@ -313,6 +298,7 @@ def _init_mgr(
mgr = mgr.astype(dtype=dtype)
return mgr

@final
def _as_manager(self, typ: str, copy: bool_t = True) -> Self:
"""
Private helper function to create a DataFrame with specific manager.
Expand Down Expand Up @@ -7314,7 +7300,6 @@ def replace(
_shared_docs["replace"],
klass=_shared_doc_kwargs["klass"],
inplace=_shared_doc_kwargs["inplace"],
replace_iloc=_shared_doc_kwargs["replace_iloc"],
)
def replace(
self,
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ def _outer_indexer(

@cache_readonly
def _can_hold_strings(self) -> bool:
return not is_numeric_dtype(self)
return not is_numeric_dtype(self.dtype)

_engine_types: dict[np.dtype | ExtensionDtype, type[libindex.IndexEngine]] = {
np.dtype(np.int8): libindex.Int8Engine,
Expand Down Expand Up @@ -3307,6 +3307,8 @@ def _wrap_setop_result(self, other: Index, result) -> Index:

@final
def intersection(self, other, sort: bool = False):
# default sort keyword is different here from other setops intentionally
# done in GH#25063
"""
Form the intersection of two Index objects.

Expand Down
3 changes: 0 additions & 3 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
contains,
)
from pandas.core.construction import extract_array
import pandas.core.indexes.base as ibase
from pandas.core.indexes.base import (
Index,
maybe_extract_name,
Expand All @@ -47,8 +46,6 @@
DtypeObj,
npt,
)
_index_doc_kwargs: dict[str, str] = dict(ibase._index_doc_kwargs)
_index_doc_kwargs.update({"target_klass": "CategoricalIndex"})


@inherit_names(
Expand Down
9 changes: 7 additions & 2 deletions pandas/core/internals/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,10 @@
)

from pandas import Index
from pandas.core.internals.blocks import Block
from pandas.core.internals.blocks import (
Block,
BlockPlacement,
)


def _concatenate_array_managers(
Expand Down Expand Up @@ -317,7 +320,9 @@ def _maybe_reindex_columns_na_proxy(
return new_mgrs_indexers


def _get_mgr_concatenation_plan(mgr: BlockManager):
def _get_mgr_concatenation_plan(
mgr: BlockManager,
) -> list[tuple[BlockPlacement, JoinUnit]]:
"""
Construct concatenation plan for given block manager.

Expand Down
18 changes: 6 additions & 12 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
needs_i8_conversion,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import PeriodDtype
from pandas.core.dtypes.missing import (
isna,
na_value_for_dtype,
Expand Down Expand Up @@ -669,7 +668,6 @@ def _mask_datetimelike_result(
return result


@disallow(PeriodDtype)
@bottleneck_switch()
@_datetimelike_compat
def nanmean(
Expand Down Expand Up @@ -808,38 +806,34 @@ def get_median(x, _mask=None):
# empty set so return nans of shape "everything but the passed axis"
# since "axis" is where the reduction would occur if we had a nonempty
# array
res = get_empty_reduction_result(values.shape, axis, np.float_, np.nan)
res = _get_empty_reduction_result(values.shape, axis)

else:
# otherwise return a scalar value
res = get_median(values, mask) if notempty else np.nan
return _wrap_results(res, dtype)


def get_empty_reduction_result(
shape: tuple[int, ...],
def _get_empty_reduction_result(
shape: Shape,
axis: AxisInt,
dtype: np.dtype | type[np.floating],
fill_value: Any,
) -> np.ndarray:
"""
The result from a reduction on an empty ndarray.

Parameters
----------
shape : Tuple[int]
shape : Tuple[int, ...]
axis : int
dtype : np.dtype
fill_value : Any

Returns
-------
np.ndarray
"""
shp = np.array(shape)
dims = np.arange(len(shape))
ret = np.empty(shp[dims != axis], dtype=dtype)
ret.fill(fill_value)
ret = np.empty(shp[dims != axis], dtype=np.float64)
ret.fill(np.nan)
return ret


Expand Down
Loading