Skip to content

Commit 6f9caeb

Browse files
committed
Merge remote-tracking branch 'upstream/master' into pd-todatetime-unit_s-float-vs-int
2 parents 8837ff4 + 6929e26 commit 6f9caeb

File tree

12 files changed

+201
-160
lines changed

12 files changed

+201
-160
lines changed

.github/workflows/stale-pr.yml

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
name: "Stale PRs"
2+
on:
3+
schedule:
4+
# * is a special character in YAML so you have to quote this string
5+
- cron: "0 */6 * * *"
6+
7+
jobs:
8+
stale:
9+
runs-on: ubuntu-latest
10+
steps:
11+
- uses: actions/stale@v3
12+
with:
13+
repo-token: ${{ secrets.GITHUB_TOKEN }}
14+
stale-pr-message: "This pull request is stale because it has been open for thirty days with no activity."
15+
skip-stale-pr-message: false
16+
stale-pr-label: "Stale"
17+
exempt-pr-labels: "Needs Review,Blocked"
18+
days-before-stale: 30
19+
days-before-close: -1
20+
remove-stale-when-updated: true
21+
debug-only: true

pandas/core/arrays/_mixins.py

+12
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from pandas.core.algorithms import take, unique
1010
from pandas.core.array_algos.transforms import shift
1111
from pandas.core.arrays.base import ExtensionArray
12+
from pandas.core.indexers import check_array_indexer
1213

1314
_T = TypeVar("_T", bound="NDArrayBackedExtensionArray")
1415

@@ -156,3 +157,14 @@ def _validate_shift_value(self, fill_value):
156157
# TODO: after deprecation in datetimelikearraymixin is enforced,
157158
# we can remove this and ust validate_fill_value directly
158159
return self._validate_fill_value(fill_value)
160+
161+
def __setitem__(self, key, value):
162+
key = self._validate_setitem_key(key)
163+
value = self._validate_setitem_value(value)
164+
self._ndarray[key] = value
165+
166+
def _validate_setitem_key(self, key):
167+
return check_array_indexer(self, key)
168+
169+
def _validate_setitem_value(self, value):
170+
return value

pandas/core/arrays/categorical.py

+17-26
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def func(self, other):
9393

9494
if is_scalar(other):
9595
if other in self.categories:
96-
i = self.categories.get_loc(other)
96+
i = self._unbox_scalar(other)
9797
ret = op(self._codes, i)
9898

9999
if opname not in {"__eq__", "__ge__", "__gt__"}:
@@ -1184,8 +1184,7 @@ def _validate_searchsorted_value(self, value):
11841184
# searchsorted is very performance sensitive. By converting codes
11851185
# to same dtype as self.codes, we get much faster performance.
11861186
if is_scalar(value):
1187-
codes = self.categories.get_loc(value)
1188-
codes = self.codes.dtype.type(codes)
1187+
codes = self._unbox_scalar(value)
11891188
else:
11901189
locs = [self.categories.get_loc(x) for x in value]
11911190
codes = np.array(locs, dtype=self.codes.dtype)
@@ -1212,7 +1211,7 @@ def _validate_fill_value(self, fill_value):
12121211
if isna(fill_value):
12131212
fill_value = -1
12141213
elif fill_value in self.categories:
1215-
fill_value = self.categories.get_loc(fill_value)
1214+
fill_value = self._unbox_scalar(fill_value)
12161215
else:
12171216
raise ValueError(
12181217
f"'fill_value={fill_value}' is not present "
@@ -1680,7 +1679,7 @@ def fillna(self, value=None, method=None, limit=None):
16801679
if isna(value):
16811680
codes[mask] = -1
16821681
else:
1683-
codes[mask] = self.categories.get_loc(value)
1682+
codes[mask] = self._unbox_scalar(value)
16841683

16851684
else:
16861685
raise TypeError(
@@ -1734,6 +1733,17 @@ def _validate_listlike(self, target: ArrayLike) -> np.ndarray:
17341733

17351734
return codes
17361735

1736+
def _unbox_scalar(self, key) -> int:
1737+
# searchsorted is very performance sensitive. By converting codes
1738+
# to same dtype as self.codes, we get much faster performance.
1739+
code = self.categories.get_loc(key)
1740+
code = self._codes.dtype.type(code)
1741+
return code
1742+
1743+
def _unbox_listlike(self, value):
1744+
unboxed = self.categories.get_indexer(value)
1745+
return unboxed.astype(self._ndarray.dtype, copy=False)
1746+
17371747
# ------------------------------------------------------------------
17381748

17391749
def take_nd(self, indexer, allow_fill: bool = False, fill_value=None):
@@ -1884,20 +1894,6 @@ def __getitem__(self, key):
18841894
return result
18851895
return self._from_backing_data(result)
18861896

1887-
def __setitem__(self, key, value):
1888-
"""
1889-
Item assignment.
1890-
1891-
Raises
1892-
------
1893-
ValueError
1894-
If (one or more) Value is not in categories or if a assigned
1895-
`Categorical` does not have the same categories
1896-
"""
1897-
key = self._validate_setitem_key(key)
1898-
value = self._validate_setitem_value(value)
1899-
self._ndarray[key] = value
1900-
19011897
def _validate_setitem_value(self, value):
19021898
value = extract_array(value, extract_numpy=True)
19031899

@@ -1925,11 +1921,7 @@ def _validate_setitem_value(self, value):
19251921
"category, set the categories first"
19261922
)
19271923

1928-
lindexer = self.categories.get_indexer(rvalue)
1929-
if isinstance(lindexer, np.ndarray) and lindexer.dtype.kind == "i":
1930-
lindexer = lindexer.astype(self._ndarray.dtype)
1931-
1932-
return lindexer
1924+
return self._unbox_listlike(rvalue)
19331925

19341926
def _validate_setitem_key(self, key):
19351927
if lib.is_integer(key):
@@ -2155,8 +2147,7 @@ def unique(self):
21552147
return cat.set_categories(cat.categories.take(take_codes))
21562148

21572149
def _values_for_factorize(self):
2158-
codes = self.codes.astype("int64")
2159-
return codes, -1
2150+
return self._ndarray, -1
21602151

21612152
@classmethod
21622153
def _from_factorized(cls, uniques, original):

pandas/core/arrays/datetimelike.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -609,9 +609,7 @@ def __setitem__(
609609
if no_op:
610610
return
611611

612-
value = self._validate_setitem_value(value)
613-
key = check_array_indexer(self, key)
614-
self._ndarray[key] = value
612+
super().__setitem__(key, value)
615613
self._maybe_clear_freq()
616614

617615
def _maybe_clear_freq(self):
@@ -697,7 +695,7 @@ def copy(self: DatetimeLikeArrayT) -> DatetimeLikeArrayT:
697695
return new_obj
698696

699697
def _values_for_factorize(self):
700-
return self.asi8, iNaT
698+
return self._ndarray, iNaT
701699

702700
@classmethod
703701
def _from_factorized(cls, values, original):

pandas/core/arrays/numpy_.py

-8
Original file line numberDiff line numberDiff line change
@@ -259,21 +259,13 @@ def __getitem__(self, item):
259259
result = type(self)(result)
260260
return result
261261

262-
def __setitem__(self, key, value) -> None:
263-
key = self._validate_setitem_key(key)
264-
value = self._validate_setitem_value(value)
265-
self._ndarray[key] = value
266-
267262
def _validate_setitem_value(self, value):
268263
value = extract_array(value, extract_numpy=True)
269264

270265
if not lib.is_scalar(value):
271266
value = np.asarray(value, dtype=self._ndarray.dtype)
272267
return value
273268

274-
def _validate_setitem_key(self, key):
275-
return check_array_indexer(self, key)
276-
277269
def isna(self) -> np.ndarray:
278270
return isna(self._ndarray)
279271

pandas/core/computation/expr.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010

1111
import numpy as np
1212

13+
from pandas.compat import PY39
14+
1315
import pandas.core.common as com
1416
from pandas.core.computation.ops import (
1517
ARITH_OPS_SYMS,
@@ -186,7 +188,6 @@ def _filter_nodes(superclass, all_nodes=_all_nodes):
186188
_stmt_nodes = _filter_nodes(ast.stmt)
187189
_expr_nodes = _filter_nodes(ast.expr)
188190
_expr_context_nodes = _filter_nodes(ast.expr_context)
189-
_slice_nodes = _filter_nodes(ast.slice)
190191
_boolop_nodes = _filter_nodes(ast.boolop)
191192
_operator_nodes = _filter_nodes(ast.operator)
192193
_unary_op_nodes = _filter_nodes(ast.unaryop)
@@ -197,6 +198,9 @@ def _filter_nodes(superclass, all_nodes=_all_nodes):
197198
_keyword_nodes = _filter_nodes(ast.keyword)
198199
_alias_nodes = _filter_nodes(ast.alias)
199200

201+
if not PY39:
202+
_slice_nodes = _filter_nodes(ast.slice)
203+
200204

201205
# nodes that we don't support directly but are needed for parsing
202206
_hacked_nodes = frozenset(["Assign", "Module", "Expr"])

pandas/core/frame.py

+11-57
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,6 @@
143143
)
144144
from pandas.core.reshape.melt import melt
145145
from pandas.core.series import Series
146-
from pandas.core.sorting import ensure_key_mapped
147146

148147
from pandas.io.common import get_filepath_or_buffer
149148
from pandas.io.formats import console, format as fmt
@@ -5448,62 +5447,17 @@ def sort_index(
54485447
C 3
54495448
d 4
54505449
"""
5451-
# TODO: this can be combined with Series.sort_index impl as
5452-
# almost identical
5453-
5454-
inplace = validate_bool_kwarg(inplace, "inplace")
5455-
5456-
axis = self._get_axis_number(axis)
5457-
labels = self._get_axis(axis)
5458-
labels = ensure_key_mapped(labels, key, levels=level)
5459-
5460-
# make sure that the axis is lexsorted to start
5461-
# if not we need to reconstruct to get the correct indexer
5462-
labels = labels._sort_levels_monotonic()
5463-
if level is not None:
5464-
new_axis, indexer = labels.sortlevel(
5465-
level, ascending=ascending, sort_remaining=sort_remaining
5466-
)
5467-
5468-
elif isinstance(labels, MultiIndex):
5469-
from pandas.core.sorting import lexsort_indexer
5470-
5471-
indexer = lexsort_indexer(
5472-
labels._get_codes_for_sorting(),
5473-
orders=ascending,
5474-
na_position=na_position,
5475-
)
5476-
else:
5477-
from pandas.core.sorting import nargsort
5478-
5479-
# Check monotonic-ness before sort an index
5480-
# GH11080
5481-
if (ascending and labels.is_monotonic_increasing) or (
5482-
not ascending and labels.is_monotonic_decreasing
5483-
):
5484-
if inplace:
5485-
return
5486-
else:
5487-
return self.copy()
5488-
5489-
indexer = nargsort(
5490-
labels, kind=kind, ascending=ascending, na_position=na_position
5491-
)
5492-
5493-
baxis = self._get_block_manager_axis(axis)
5494-
new_data = self._mgr.take(indexer, axis=baxis, verify=False)
5495-
5496-
# reconstruct axis if needed
5497-
new_data.axes[baxis] = new_data.axes[baxis]._sort_levels_monotonic()
5498-
5499-
if ignore_index:
5500-
new_data.axes[1] = ibase.default_index(len(indexer))
5501-
5502-
result = self._constructor(new_data)
5503-
if inplace:
5504-
return self._update_inplace(result)
5505-
else:
5506-
return result.__finalize__(self, method="sort_index")
5450+
return super().sort_index(
5451+
axis,
5452+
level,
5453+
ascending,
5454+
inplace,
5455+
kind,
5456+
na_position,
5457+
sort_remaining,
5458+
ignore_index,
5459+
key,
5460+
)
55075461

55085462
def value_counts(
55095463
self,

pandas/core/generic.py

+47
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
CompressionOptions,
4141
FilePathOrBuffer,
4242
FrameOrSeries,
43+
IndexKeyFunc,
4344
IndexLabel,
4445
JSONSerializable,
4546
Label,
@@ -92,6 +93,7 @@
9293
import pandas.core.common as com
9394
from pandas.core.construction import create_series_with_explicit_dtype
9495
from pandas.core.flags import Flags
96+
from pandas.core.indexes import base as ibase
9597
from pandas.core.indexes.api import Index, MultiIndex, RangeIndex, ensure_index
9698
from pandas.core.indexes.datetimes import DatetimeIndex
9799
from pandas.core.indexes.period import Period, PeriodIndex
@@ -100,6 +102,7 @@
100102
from pandas.core.missing import find_valid_index
101103
from pandas.core.ops import align_method_FRAME
102104
from pandas.core.shared_docs import _shared_docs
105+
from pandas.core.sorting import get_indexer_indexer
103106
from pandas.core.window import Expanding, ExponentialMovingWindow, Rolling, Window
104107

105108
from pandas.io.formats import format as fmt
@@ -4409,6 +4412,50 @@ def sort_values(
44094412
"""
44104413
raise AbstractMethodError(self)
44114414

4415+
def sort_index(
4416+
self,
4417+
axis=0,
4418+
level=None,
4419+
ascending: bool_t = True,
4420+
inplace: bool_t = False,
4421+
kind: str = "quicksort",
4422+
na_position: str = "last",
4423+
sort_remaining: bool_t = True,
4424+
ignore_index: bool_t = False,
4425+
key: IndexKeyFunc = None,
4426+
):
4427+
4428+
inplace = validate_bool_kwarg(inplace, "inplace")
4429+
axis = self._get_axis_number(axis)
4430+
target = self._get_axis(axis)
4431+
4432+
indexer = get_indexer_indexer(
4433+
target, level, ascending, kind, na_position, sort_remaining, key
4434+
)
4435+
4436+
if indexer is None:
4437+
if inplace:
4438+
return
4439+
else:
4440+
return self.copy()
4441+
4442+
baxis = self._get_block_manager_axis(axis)
4443+
new_data = self._mgr.take(indexer, axis=baxis, verify=False)
4444+
4445+
# reconstruct axis if needed
4446+
new_data.axes[baxis] = new_data.axes[baxis]._sort_levels_monotonic()
4447+
4448+
if ignore_index:
4449+
axis = 1 if isinstance(self, ABCDataFrame) else 0
4450+
new_data.axes[axis] = ibase.default_index(len(indexer))
4451+
4452+
result = self._constructor(new_data)
4453+
4454+
if inplace:
4455+
return self._update_inplace(result)
4456+
else:
4457+
return result.__finalize__(self, method="sort_index")
4458+
44124459
@doc(
44134460
klass=_shared_doc_kwargs["klass"],
44144461
axes=_shared_doc_kwargs["axes"],

pandas/core/indexes/base.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -2250,7 +2250,7 @@ def fillna(self, value=None, downcast=None):
22502250
DataFrame.fillna : Fill NaN values of a DataFrame.
22512251
Series.fillna : Fill NaN Values of a Series.
22522252
"""
2253-
self._assert_can_do_op(value)
2253+
value = self._validate_scalar(value)
22542254
if self.hasnans:
22552255
result = self.putmask(self._isnan, value)
22562256
if downcast is None:
@@ -4053,12 +4053,14 @@ def _validate_fill_value(self, value):
40534053
"""
40544054
return value
40554055

4056-
def _assert_can_do_op(self, value):
4056+
def _validate_scalar(self, value):
40574057
"""
4058-
Check value is valid for scalar op.
4058+
Check that this is a scalar value that we can use for setitem-like
4059+
operations without changing dtype.
40594060
"""
40604061
if not is_scalar(value):
40614062
raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}")
4063+
return value
40624064

40634065
@property
40644066
def _has_complex_internals(self) -> bool:

0 commit comments

Comments
 (0)