Skip to content

Commit 2d5ad57

Browse files
authored
CLN: assorted (#52569)
1 parent c537b36 commit 2d5ad57

28 files changed

+65
-148
lines changed

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,7 @@ Performance improvements
248248
- Performance improvement in :meth:`Series.combine_first` (:issue:`51777`)
249249
- Performance improvement in :meth:`MultiIndex.set_levels` and :meth:`MultiIndex.set_codes` when ``verify_integrity=True`` (:issue:`51873`)
250250
- Performance improvement in :func:`factorize` for object columns not containing strings (:issue:`51921`)
251+
- Performance improvement in :func:`concat` (:issue:`52291`, :issue:`52290`)
251252
- Performance improvement in :class:`Series` reductions (:issue:`52341`)
252253
- Performance improvement in :meth:`Series.to_numpy` when dtype is a numpy float dtype and ``na_value`` is ``np.nan`` (:issue:`52430`)
253254
- Performance improvement in :meth:`Series.corr` and :meth:`Series.cov` for extension dtypes (:issue:`52502`)

pandas/_config/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,11 @@
3030
from pandas._config.display import detect_console_encoding
3131

3232

33-
def using_copy_on_write():
33+
def using_copy_on_write() -> bool:
3434
_mode_options = _global_config["mode"]
3535
return _mode_options["copy_on_write"] and _mode_options["data_manager"] == "block"
3636

3737

38-
def using_nullable_dtypes():
38+
def using_nullable_dtypes() -> bool:
3939
_mode_options = _global_config["mode"]
4040
return _mode_options["nullable_dtypes"]

pandas/_libs/tslibs/timestamps.pyi

+6-5
Original file line numberDiff line numberDiff line change
@@ -131,12 +131,13 @@ class Timestamp(datetime):
131131
def astimezone(self, tz: _tzinfo | None) -> Self: ... # type: ignore[override]
132132
def ctime(self) -> str: ...
133133
def isoformat(self, sep: str = ..., timespec: str = ...) -> str: ...
134-
# Return type "datetime" of "strptime" incompatible with return type "Timestamp"
135-
# in supertype "datetime"
136134
@classmethod
137-
def strptime( # type: ignore[override]
138-
cls, date_string: str, format: str
139-
) -> datetime: ...
135+
def strptime(
136+
# Note: strptime is actually disabled and raises NotImplementedError
137+
cls,
138+
date_string: str,
139+
format: str,
140+
) -> Self: ...
140141
def utcoffset(self) -> timedelta | None: ...
141142
def tzname(self) -> str | None: ...
142143
def dst(self) -> timedelta | None: ...

pandas/_testing/asserters.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,7 @@ def assert_period_array_equal(left, right, obj: str = "PeriodArray") -> None:
547547
_check_isinstance(left, right, PeriodArray)
548548

549549
assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray")
550-
assert_attr_equal("freq", left, right, obj=obj)
550+
assert_attr_equal("dtype", left, right, obj=obj)
551551

552552

553553
def assert_datetime_array_equal(

pandas/core/algorithms.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> npt.NDArray[np.bool_]:
471471

472472
if (
473473
len(values) > 0
474-
and is_numeric_dtype(values)
474+
and is_numeric_dtype(values.dtype)
475475
and not is_signed_integer_dtype(comps)
476476
):
477477
# GH#46485 Use object to avoid upcast to float64 later

pandas/core/apply.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ def transform(self) -> DataFrame | Series:
236236
# DataFrameGroupBy, BaseWindow, Resampler]"; expected "Union[DataFrame,
237237
# Series]"
238238
if not isinstance(result, (ABCSeries, ABCDataFrame)) or not result.index.equals(
239-
obj.index # type:ignore[arg-type]
239+
obj.index # type: ignore[arg-type]
240240
):
241241
raise ValueError("Function did not transform")
242242

pandas/core/arrays/arrow/array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1150,7 +1150,7 @@ def _concat_same_type(cls, to_concat) -> Self:
11501150
"""
11511151
chunks = [array for ea in to_concat for array in ea._pa_array.iterchunks()]
11521152
if to_concat[0].dtype == "string":
1153-
# StringDtype has no attrivute pyarrow_dtype
1153+
# StringDtype has no attribute pyarrow_dtype
11541154
pa_dtype = pa.string()
11551155
else:
11561156
pa_dtype = to_concat[0].dtype.pyarrow_dtype

pandas/core/arrays/boolean.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,7 @@
1313
missing as libmissing,
1414
)
1515

16-
from pandas.core.dtypes.common import (
17-
is_list_like,
18-
is_numeric_dtype,
19-
)
16+
from pandas.core.dtypes.common import is_list_like
2017
from pandas.core.dtypes.dtypes import register_extension_dtype
2118
from pandas.core.dtypes.missing import isna
2219

@@ -180,7 +177,7 @@ def coerce_to_array(
180177
if isinstance(values, np.ndarray) and values.dtype == np.bool_:
181178
if copy:
182179
values = values.copy()
183-
elif isinstance(values, np.ndarray) and is_numeric_dtype(values.dtype):
180+
elif isinstance(values, np.ndarray) and values.dtype.kind in "iufcb":
184181
mask_values = isna(values)
185182

186183
values_bool = np.zeros(len(values), dtype=bool)

pandas/core/arrays/datetimelike.py

+1-8
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,6 @@
113113
from pandas.core import (
114114
algorithms,
115115
nanops,
116-
ops,
117116
)
118117
from pandas.core.algorithms import (
119118
checked_add_with_arr,
@@ -903,13 +902,7 @@ def _cmp_method(self, other, op):
903902

904903
dtype = getattr(other, "dtype", None)
905904
if is_object_dtype(dtype):
906-
# We have to use comp_method_OBJECT_ARRAY instead of numpy
907-
# comparison otherwise it would fail to raise when
908-
# comparing tz-aware and tz-naive
909-
result = ops.comp_method_OBJECT_ARRAY(
910-
op, np.asarray(self.astype(object)), other
911-
)
912-
return result
905+
return op(np.asarray(self, dtype=object), other)
913906

914907
if other is NaT:
915908
if op is operator.ne:

pandas/core/arrays/sparse/array.py

-15
Original file line numberDiff line numberDiff line change
@@ -1805,21 +1805,6 @@ def _formatter(self, boxed: bool = False):
18051805
# This will infer the correct formatter from the dtype of the values.
18061806
return None
18071807

1808-
# ------------------------------------------------------------------------
1809-
# GroupBy Methods
1810-
1811-
def _groupby_op(
1812-
self,
1813-
*,
1814-
how: str,
1815-
has_dropped_na: bool,
1816-
min_count: int,
1817-
ngroups: int,
1818-
ids: npt.NDArray[np.intp],
1819-
**kwargs,
1820-
):
1821-
raise NotImplementedError(f"{self.dtype} dtype not supported")
1822-
18231808

18241809
def _make_sparse(
18251810
arr: np.ndarray,

pandas/core/dtypes/missing.py

-19
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,8 @@
2525
DT64NS_DTYPE,
2626
TD64NS_DTYPE,
2727
ensure_object,
28-
is_bool_dtype,
2928
is_dtype_equal,
3029
is_extension_array_dtype,
31-
is_integer_dtype,
3230
is_object_dtype,
3331
is_scalar,
3432
is_string_or_object_np_dtype,
@@ -431,23 +429,6 @@ def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame:
431429
notnull = notna
432430

433431

434-
def isna_compat(arr, fill_value=np.nan) -> bool:
435-
"""
436-
Parameters
437-
----------
438-
arr: a numpy array
439-
fill_value: fill value, default to np.nan
440-
441-
Returns
442-
-------
443-
True if we can fill using this fill_value
444-
"""
445-
if isna(fill_value):
446-
dtype = arr.dtype
447-
return not (is_bool_dtype(dtype) or is_integer_dtype(dtype))
448-
return True
449-
450-
451432
def array_equivalent(
452433
left,
453434
right,

pandas/core/frame.py

+2-12
Original file line numberDiff line numberDiff line change
@@ -278,15 +278,8 @@
278278
axis : int or str, optional
279279
Axis to target. Can be either the axis name ('index', 'columns')
280280
or number (0, 1).""",
281-
"replace_iloc": """
282-
This differs from updating with ``.loc`` or ``.iloc``, which require
283-
you to specify a location to update with some value.""",
284281
}
285282

286-
_numeric_only_doc = """numeric_only : bool, default False
287-
Include only float, int, boolean data.
288-
"""
289-
290283
_merge_doc = """
291284
Merge DataFrame or named Series objects with a database-style join.
292285
@@ -5736,7 +5729,7 @@ def set_index(
57365729

57375730
# error: Argument 1 to "append" of "list" has incompatible type
57385731
# "Union[Index, Series]"; expected "Index"
5739-
arrays.append(col) # type:ignore[arg-type]
5732+
arrays.append(col) # type: ignore[arg-type]
57405733
names.append(col.name)
57415734
elif isinstance(col, (list, np.ndarray)):
57425735
# error: Argument 1 to "append" of "list" has incompatible type
@@ -7791,10 +7784,7 @@ def _flex_arith_method(
77917784
# through the DataFrame path
77927785
raise NotImplementedError(f"fill_value {fill_value} not supported.")
77937786

7794-
other = ops.maybe_prepare_scalar_for_op(
7795-
other,
7796-
self.shape,
7797-
)
7787+
other = ops.maybe_prepare_scalar_for_op(other, self.shape)
77987788
self, other = self._align_for_op(other, axis, flex=True, level=level)
77997789

78007790
with np.errstate(all="ignore"):

pandas/core/generic.py

+3-18
Original file line numberDiff line numberDiff line change
@@ -212,16 +212,12 @@
212212
"axes": "keywords for axes",
213213
"klass": "Series/DataFrame",
214214
"axes_single_arg": "{0 or 'index'} for Series, {0 or 'index', 1 or 'columns'} for DataFrame", # noqa:E501
215-
"args_transpose": "axes to permute (int or label for object)",
216215
"inplace": """
217216
inplace : bool, default False
218217
If True, performs operation inplace and returns None.""",
219218
"optional_by": """
220219
by : str or list of str
221220
Name or list of names to sort by""",
222-
"replace_iloc": """
223-
This differs from updating with ``.loc`` or ``.iloc``, which require
224-
you to specify a location to update with some value.""",
225221
}
226222

227223

@@ -264,22 +260,11 @@ class NDFrame(PandasObject, indexing.IndexingMixin):
264260
# ----------------------------------------------------------------------
265261
# Constructors
266262

267-
def __init__(
268-
self,
269-
data: Manager,
270-
copy: bool_t = False,
271-
attrs: Mapping[Hashable, Any] | None = None,
272-
) -> None:
273-
# copy kwarg is retained for mypy compat, is not used
274-
263+
def __init__(self, data: Manager) -> None:
275264
object.__setattr__(self, "_is_copy", None)
276265
object.__setattr__(self, "_mgr", data)
277266
object.__setattr__(self, "_item_cache", {})
278-
if attrs is None:
279-
attrs = {}
280-
else:
281-
attrs = dict(attrs)
282-
object.__setattr__(self, "_attrs", attrs)
267+
object.__setattr__(self, "_attrs", {})
283268
object.__setattr__(self, "_flags", Flags(self, allows_duplicate_labels=True))
284269

285270
@final
@@ -313,6 +298,7 @@ def _init_mgr(
313298
mgr = mgr.astype(dtype=dtype)
314299
return mgr
315300

301+
@final
316302
def _as_manager(self, typ: str, copy: bool_t = True) -> Self:
317303
"""
318304
Private helper function to create a DataFrame with specific manager.
@@ -7314,7 +7300,6 @@ def replace(
73147300
_shared_docs["replace"],
73157301
klass=_shared_doc_kwargs["klass"],
73167302
inplace=_shared_doc_kwargs["inplace"],
7317-
replace_iloc=_shared_doc_kwargs["replace_iloc"],
73187303
)
73197304
def replace(
73207305
self,

pandas/core/indexes/base.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,7 @@ def _outer_indexer(
431431

432432
@cache_readonly
433433
def _can_hold_strings(self) -> bool:
434-
return not is_numeric_dtype(self)
434+
return not is_numeric_dtype(self.dtype)
435435

436436
_engine_types: dict[np.dtype | ExtensionDtype, type[libindex.IndexEngine]] = {
437437
np.dtype(np.int8): libindex.Int8Engine,
@@ -3307,6 +3307,8 @@ def _wrap_setop_result(self, other: Index, result) -> Index:
33073307

33083308
@final
33093309
def intersection(self, other, sort: bool = False):
3310+
# default sort keyword is different here from other setops intentionally
3311+
# done in GH#25063
33103312
"""
33113313
Form the intersection of two Index objects.
33123314

pandas/core/indexes/category.py

-3
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
contains,
3030
)
3131
from pandas.core.construction import extract_array
32-
import pandas.core.indexes.base as ibase
3332
from pandas.core.indexes.base import (
3433
Index,
3534
maybe_extract_name,
@@ -47,8 +46,6 @@
4746
DtypeObj,
4847
npt,
4948
)
50-
_index_doc_kwargs: dict[str, str] = dict(ibase._index_doc_kwargs)
51-
_index_doc_kwargs.update({"target_klass": "CategoricalIndex"})
5249

5350

5451
@inherit_names(

pandas/core/internals/concat.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,10 @@
6464
)
6565

6666
from pandas import Index
67-
from pandas.core.internals.blocks import Block
67+
from pandas.core.internals.blocks import (
68+
Block,
69+
BlockPlacement,
70+
)
6871

6972

7073
def _concatenate_array_managers(
@@ -317,7 +320,9 @@ def _maybe_reindex_columns_na_proxy(
317320
return new_mgrs_indexers
318321

319322

320-
def _get_mgr_concatenation_plan(mgr: BlockManager):
323+
def _get_mgr_concatenation_plan(
324+
mgr: BlockManager,
325+
) -> list[tuple[BlockPlacement, JoinUnit]]:
321326
"""
322327
Construct concatenation plan for given block manager.
323328

pandas/core/nanops.py

+6-12
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444
needs_i8_conversion,
4545
pandas_dtype,
4646
)
47-
from pandas.core.dtypes.dtypes import PeriodDtype
4847
from pandas.core.dtypes.missing import (
4948
isna,
5049
na_value_for_dtype,
@@ -669,7 +668,6 @@ def _mask_datetimelike_result(
669668
return result
670669

671670

672-
@disallow(PeriodDtype)
673671
@bottleneck_switch()
674672
@_datetimelike_compat
675673
def nanmean(
@@ -808,38 +806,34 @@ def get_median(x, _mask=None):
808806
# empty set so return nans of shape "everything but the passed axis"
809807
# since "axis" is where the reduction would occur if we had a nonempty
810808
# array
811-
res = get_empty_reduction_result(values.shape, axis, np.float_, np.nan)
809+
res = _get_empty_reduction_result(values.shape, axis)
812810

813811
else:
814812
# otherwise return a scalar value
815813
res = get_median(values, mask) if notempty else np.nan
816814
return _wrap_results(res, dtype)
817815

818816

819-
def get_empty_reduction_result(
820-
shape: tuple[int, ...],
817+
def _get_empty_reduction_result(
818+
shape: Shape,
821819
axis: AxisInt,
822-
dtype: np.dtype | type[np.floating],
823-
fill_value: Any,
824820
) -> np.ndarray:
825821
"""
826822
The result from a reduction on an empty ndarray.
827823
828824
Parameters
829825
----------
830-
shape : Tuple[int]
826+
shape : Tuple[int, ...]
831827
axis : int
832-
dtype : np.dtype
833-
fill_value : Any
834828
835829
Returns
836830
-------
837831
np.ndarray
838832
"""
839833
shp = np.array(shape)
840834
dims = np.arange(len(shape))
841-
ret = np.empty(shp[dims != axis], dtype=dtype)
842-
ret.fill(fill_value)
835+
ret = np.empty(shp[dims != axis], dtype=np.float64)
836+
ret.fill(np.nan)
843837
return ret
844838

845839

0 commit comments

Comments
 (0)