Skip to content

Commit e8d3136

Browse files
authored
CLN: address TODOs, FIXMEs (#44258)
1 parent cb83977 commit e8d3136

File tree

8 files changed

+47
-13
lines changed

8 files changed

+47
-13
lines changed

pandas/_libs/join.pyx

+28-2
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,9 @@ def left_join_indexer_unique(
264264
ndarray[numeric_object_t] left,
265265
ndarray[numeric_object_t] right
266266
):
267+
"""
268+
Both left and right are strictly monotonic increasing.
269+
"""
267270
cdef:
268271
Py_ssize_t i, j, nleft, nright
269272
ndarray[intp_t] indexer
@@ -311,6 +314,9 @@ def left_join_indexer_unique(
311314
def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right):
312315
"""
313316
Two-pass algorithm for monotonic indexes. Handles many-to-one merges.
317+
318+
Both left and right are monotonic increasing, but at least one of them
319+
is non-unique (if both were unique we'd use left_join_indexer_unique).
314320
"""
315321
cdef:
316322
Py_ssize_t i, j, k, nright, nleft, count
@@ -321,6 +327,7 @@ def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
321327
nleft = len(left)
322328
nright = len(right)
323329

330+
# First pass is to find the size 'count' of our output indexers.
324331
i = 0
325332
j = 0
326333
count = 0
@@ -334,6 +341,8 @@ def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
334341
rval = right[j]
335342

336343
if lval == rval:
344+
# This block is identical across
345+
# left_join_indexer, inner_join_indexer, outer_join_indexer
337346
count += 1
338347
if i < nleft - 1:
339348
if j < nright - 1 and right[j + 1] == rval:
@@ -398,12 +407,14 @@ def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
398407
# end of the road
399408
break
400409
elif lval < rval:
410+
# i.e. lval not in right; we keep for left_join_indexer
401411
lindexer[count] = i
402412
rindexer[count] = -1
403-
result[count] = left[i]
413+
result[count] = lval
404414
count += 1
405415
i += 1
406416
else:
417+
# i.e. rval not in left; we discard for left_join_indexer
407418
j += 1
408419

409420
return result, lindexer, rindexer
@@ -414,6 +425,8 @@ def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
414425
def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right):
415426
"""
416427
Two-pass algorithm for monotonic indexes. Handles many-to-one merges.
428+
429+
Both left and right are monotonic increasing but not necessarily unique.
417430
"""
418431
cdef:
419432
Py_ssize_t i, j, k, nright, nleft, count
@@ -424,6 +437,7 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
424437
nleft = len(left)
425438
nright = len(right)
426439

440+
# First pass is to find the size 'count' of our output indexers.
427441
i = 0
428442
j = 0
429443
count = 0
@@ -453,8 +467,10 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
453467
# end of the road
454468
break
455469
elif lval < rval:
470+
# i.e. lval not in right; we discard for inner_indexer
456471
i += 1
457472
else:
473+
# i.e. rval not in left; we discard for inner_indexer
458474
j += 1
459475

460476
# do it again now that result size is known
@@ -478,7 +494,7 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
478494
if lval == rval:
479495
lindexer[count] = i
480496
rindexer[count] = j
481-
result[count] = rval
497+
result[count] = lval
482498
count += 1
483499
if i < nleft - 1:
484500
if j < nright - 1 and right[j + 1] == rval:
@@ -495,8 +511,10 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
495511
# end of the road
496512
break
497513
elif lval < rval:
514+
# i.e. lval not in right; we discard for inner_indexer
498515
i += 1
499516
else:
517+
# i.e. rval not in left; we discard for inner_indexer
500518
j += 1
501519

502520
return result, lindexer, rindexer
@@ -505,6 +523,9 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
505523
@cython.wraparound(False)
506524
@cython.boundscheck(False)
507525
def outer_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right):
526+
"""
527+
Both left and right are monotonic increasing but not necessarily unique.
528+
"""
508529
cdef:
509530
Py_ssize_t i, j, nright, nleft, count
510531
numeric_object_t lval, rval
@@ -514,6 +535,9 @@ def outer_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
514535
nleft = len(left)
515536
nright = len(right)
516537

538+
# First pass is to find the size 'count' of our output indexers.
539+
# count will be length of left plus the number of elements of right not in
540+
# left (counting duplicates)
517541
i = 0
518542
j = 0
519543
count = 0
@@ -616,12 +640,14 @@ def outer_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
616640
# end of the road
617641
break
618642
elif lval < rval:
643+
# i.e. lval not in right; we keep for outer_join_indexer
619644
lindexer[count] = i
620645
rindexer[count] = -1
621646
result[count] = lval
622647
count += 1
623648
i += 1
624649
else:
650+
# i.e. rval not in left; we keep for outer_join_indexer
625651
lindexer[count] = -1
626652
rindexer[count] = j
627653
result[count] = rval

pandas/_libs/tslibs/fields.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ cdef inline bint _is_on_month(int month, int compare_month, int modby) nogil:
198198
@cython.wraparound(False)
199199
@cython.boundscheck(False)
200200
def get_start_end_field(const int64_t[:] dtindex, str field,
201-
object freqstr=None, int month_kw=12):
201+
str freqstr=None, int month_kw=12):
202202
"""
203203
Given an int64-based datetime index return array of indicators
204204
of whether timestamps are at the start/end of the month/quarter/year

pandas/core/array_algos/putmask.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99
import numpy as np
1010

1111
from pandas._libs import lib
12-
from pandas._typing import ArrayLike
12+
from pandas._typing import (
13+
ArrayLike,
14+
npt,
15+
)
1316

1417
from pandas.core.dtypes.cast import (
1518
convert_scalar_for_putitemlike,
@@ -26,13 +29,14 @@
2629
from pandas.core.arrays import ExtensionArray
2730

2831

29-
def putmask_inplace(values: ArrayLike, mask: np.ndarray, value: Any) -> None:
32+
def putmask_inplace(values: ArrayLike, mask: npt.NDArray[np.bool_], value: Any) -> None:
3033
"""
3134
ExtensionArray-compatible implementation of np.putmask. The main
3235
difference is we do not handle repeating or truncating like numpy.
3336
3437
Parameters
3538
----------
39+
values: np.ndarray or ExtensionArray
3640
mask : np.ndarray[bool]
3741
We assume extract_bool_array has already been called.
3842
value : Any
@@ -51,6 +55,7 @@ def putmask_inplace(values: ArrayLike, mask: np.ndarray, value: Any) -> None:
5155
)
5256
):
5357
# GH#19266 using np.putmask gives unexpected results with listlike value
58+
# along with object dtype
5459
if is_list_like(value) and len(value) == len(values):
5560
values[mask] = value[mask]
5661
else:

pandas/core/dtypes/dtypes.py

-1
Original file line numberDiff line numberDiff line change
@@ -1259,7 +1259,6 @@ def __from_arrow__(
12591259
return IntervalArray._concat_same_type(results)
12601260

12611261
def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
1262-
# NB: this doesn't handle checking for closed match
12631262
if not all(isinstance(x, IntervalDtype) for x in dtypes):
12641263
return None
12651264

pandas/core/indexers/utils.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -104,14 +104,14 @@ def is_scalar_indexer(indexer, ndim: int) -> bool:
104104
return False
105105

106106

107-
def is_empty_indexer(indexer, arr_value: np.ndarray) -> bool:
107+
def is_empty_indexer(indexer, arr_value: ArrayLike) -> bool:
108108
"""
109109
Check if we have an empty indexer.
110110
111111
Parameters
112112
----------
113113
indexer : object
114-
arr_value : np.ndarray
114+
arr_value : np.ndarray or ExtensionArray
115115
116116
Returns
117117
-------

pandas/core/indexes/base.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -3123,7 +3123,9 @@ def _union(self, other: Index, sort):
31233123
and not (self.has_duplicates and other.has_duplicates)
31243124
and self._can_use_libjoin
31253125
):
3126-
# Both are unique and monotonic, so can use outer join
3126+
# Both are monotonic and at least one is unique, so can use outer join
3127+
# (actually don't need either unique, but without this restriction
3128+
# test_union_same_value_duplicated_in_both fails)
31273129
try:
31283130
return self._outer_indexer(other)[0]
31293131
except (TypeError, IncompatibleFrequency):

pandas/core/internals/blocks.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -918,7 +918,7 @@ def setitem(self, indexer, value):
918918
check_setitem_lengths(indexer, value, values)
919919

920920
if is_empty_indexer(indexer, arr_value):
921-
# GH#8669 empty indexers
921+
# GH#8669 empty indexers, test_loc_setitem_boolean_mask_allfalse
922922
pass
923923

924924
elif is_scalar_indexer(indexer, self.ndim):
@@ -1698,7 +1698,7 @@ def putmask(self, mask, new) -> list[Block]:
16981698
mask = extract_bool_array(mask)
16991699

17001700
if not self._can_hold_element(new):
1701-
return self.astype(_dtype_obj).putmask(mask, new)
1701+
return self.coerce_to_target_dtype(new).putmask(mask, new)
17021702

17031703
arr = self.values
17041704
arr.T.putmask(mask, new)
@@ -1755,7 +1755,9 @@ def fillna(
17551755
# We support filling a DatetimeTZ with a `value` whose timezone
17561756
# is different by coercing to object.
17571757
# TODO: don't special-case td64
1758-
return self.astype(_dtype_obj).fillna(value, limit, inplace, downcast)
1758+
return self.coerce_to_target_dtype(value).fillna(
1759+
value, limit, inplace, downcast
1760+
)
17591761

17601762
values = self.values
17611763
values = values if inplace else values.copy()

pandas/tests/arithmetic/test_timedelta64.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2075,7 +2075,7 @@ def test_td64arr_div_numeric_array(
20752075
with pytest.raises(TypeError, match=pattern):
20762076
vector.astype(object) / tdser
20772077

2078-
def test_td64arr_mul_int_series(self, box_with_array, names, request):
2078+
def test_td64arr_mul_int_series(self, box_with_array, names):
20792079
# GH#19042 test for correct name attachment
20802080
box = box_with_array
20812081
exname = get_expected_name(box, names)

0 commit comments

Comments
 (0)