Skip to content

CLN: address TODOs, FIXMEs #44258

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions pandas/_libs/join.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,9 @@ def left_join_indexer_unique(
ndarray[numeric_object_t] left,
ndarray[numeric_object_t] right
):
"""
Both left and right are strictly monotonic increasing.
"""
cdef:
Py_ssize_t i, j, nleft, nright
ndarray[intp_t] indexer
Expand Down Expand Up @@ -311,6 +314,9 @@ def left_join_indexer_unique(
def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right):
"""
Two-pass algorithm for monotonic indexes. Handles many-to-one merges.

Both left and right are monotonic increasing, but at least one of them
is non-unique (if both were unique we'd use left_join_indexer_unique).
"""
cdef:
Py_ssize_t i, j, k, nright, nleft, count
Expand All @@ -321,6 +327,7 @@ def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
nleft = len(left)
nright = len(right)

# First pass is to find the size 'count' of our output indexers.
i = 0
j = 0
count = 0
Expand All @@ -334,6 +341,8 @@ def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
rval = right[j]

if lval == rval:
# This block is identical across
# left_join_indexer, inner_join_indexer, outer_join_indexer
count += 1
if i < nleft - 1:
if j < nright - 1 and right[j + 1] == rval:
Expand Down Expand Up @@ -398,12 +407,14 @@ def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
# end of the road
break
elif lval < rval:
# i.e. lval not in right; we keep for left_join_indexer
lindexer[count] = i
rindexer[count] = -1
result[count] = left[i]
result[count] = lval
count += 1
i += 1
else:
# i.e. rval not in left; we discard for left_join_indexer
j += 1

return result, lindexer, rindexer
Expand All @@ -414,6 +425,8 @@ def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right):
"""
Two-pass algorithm for monotonic indexes. Handles many-to-one merges.

Both left and right are monotonic increasing but not necessarily unique.
"""
cdef:
Py_ssize_t i, j, k, nright, nleft, count
Expand All @@ -424,6 +437,7 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
nleft = len(left)
nright = len(right)

# First pass is to find the size 'count' of our output indexers.
i = 0
j = 0
count = 0
Expand Down Expand Up @@ -453,8 +467,10 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
# end of the road
break
elif lval < rval:
# i.e. lval not in right; we discard for inner_indexer
i += 1
else:
# i.e. rval not in left; we discard for inner_indexer
j += 1

# do it again now that result size is known
Expand All @@ -478,7 +494,7 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
if lval == rval:
lindexer[count] = i
rindexer[count] = j
result[count] = rval
result[count] = lval
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why this change, this didn't break anything?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lval == rval here, this just makes this block match whats done elsewhere

count += 1
if i < nleft - 1:
if j < nright - 1 and right[j + 1] == rval:
Expand All @@ -495,8 +511,10 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
# end of the road
break
elif lval < rval:
# i.e. lval not in right; we discard for inner_indexer
i += 1
else:
# i.e. rval not in left; we discard for inner_indexer
j += 1

return result, lindexer, rindexer
Expand All @@ -505,6 +523,9 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
@cython.wraparound(False)
@cython.boundscheck(False)
def outer_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right):
"""
Both left and right are monotonic increasing but not necessarily unique.
"""
cdef:
Py_ssize_t i, j, nright, nleft, count
numeric_object_t lval, rval
Expand All @@ -514,6 +535,9 @@ def outer_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
nleft = len(left)
nright = len(right)

# First pass is to find the size 'count' of our output indexers.
# count will be length of left plus the number of elements of right not in
# left (counting duplicates)
i = 0
j = 0
count = 0
Expand Down Expand Up @@ -616,12 +640,14 @@ def outer_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
# end of the road
break
elif lval < rval:
# i.e. lval not in right; we keep for outer_join_indexer
lindexer[count] = i
rindexer[count] = -1
result[count] = lval
count += 1
i += 1
else:
# i.e. rval not in left; we keep for outer_join_indexer
lindexer[count] = -1
rindexer[count] = j
result[count] = rval
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/fields.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ cdef inline bint _is_on_month(int month, int compare_month, int modby) nogil:
@cython.wraparound(False)
@cython.boundscheck(False)
def get_start_end_field(const int64_t[:] dtindex, str field,
object freqstr=None, int month_kw=12):
str freqstr=None, int month_kw=12):
"""
Given an int64-based datetime index return array of indicators
of whether timestamps are at the start/end of the month/quarter/year
Expand Down
9 changes: 7 additions & 2 deletions pandas/core/array_algos/putmask.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@
import numpy as np

from pandas._libs import lib
from pandas._typing import ArrayLike
from pandas._typing import (
ArrayLike,
npt,
)

from pandas.core.dtypes.cast import (
convert_scalar_for_putitemlike,
Expand All @@ -26,13 +29,14 @@
from pandas.core.arrays import ExtensionArray


def putmask_inplace(values: ArrayLike, mask: np.ndarray, value: Any) -> None:
def putmask_inplace(values: ArrayLike, mask: npt.NDArray[np.bool_], value: Any) -> None:
"""
ExtensionArray-compatible implementation of np.putmask. The main
difference is we do not handle repeating or truncating like numpy.

Parameters
----------
values: np.ndarray or ExtensionArray
mask : np.ndarray[bool]
We assume extract_bool_array has already been called.
value : Any
Expand All @@ -51,6 +55,7 @@ def putmask_inplace(values: ArrayLike, mask: np.ndarray, value: Any) -> None:
)
):
# GH#19266 using np.putmask gives unexpected results with listlike value
# along with object dtype
if is_list_like(value) and len(value) == len(values):
values[mask] = value[mask]
else:
Expand Down
1 change: 0 additions & 1 deletion pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1259,7 +1259,6 @@ def __from_arrow__(
return IntervalArray._concat_same_type(results)

def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
# NB: this doesn't handle checking for closed match
if not all(isinstance(x, IntervalDtype) for x in dtypes):
return None

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,14 @@ def is_scalar_indexer(indexer, ndim: int) -> bool:
return False


def is_empty_indexer(indexer, arr_value: np.ndarray) -> bool:
def is_empty_indexer(indexer, arr_value: ArrayLike) -> bool:
"""
Check if we have an empty indexer.

Parameters
----------
indexer : object
arr_value : np.ndarray
arr_value : np.ndarray or ExtensionArray

Returns
-------
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3123,7 +3123,9 @@ def _union(self, other: Index, sort):
and not (self.has_duplicates and other.has_duplicates)
and self._can_use_libjoin
):
# Both are unique and monotonic, so can use outer join
# Both are monotonic and at least one is unique, so can use outer join
# (actually don't need either unique, but without this restriction
# test_union_same_value_duplicated_in_both fails)
try:
return self._outer_indexer(other)[0]
except (TypeError, IncompatibleFrequency):
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -918,7 +918,7 @@ def setitem(self, indexer, value):
check_setitem_lengths(indexer, value, values)

if is_empty_indexer(indexer, arr_value):
# GH#8669 empty indexers
# GH#8669 empty indexers, test_loc_setitem_boolean_mask_allfalse
pass

elif is_scalar_indexer(indexer, self.ndim):
Expand Down Expand Up @@ -1698,7 +1698,7 @@ def putmask(self, mask, new) -> list[Block]:
mask = extract_bool_array(mask)

if not self._can_hold_element(new):
return self.astype(_dtype_obj).putmask(mask, new)
return self.coerce_to_target_dtype(new).putmask(mask, new)

arr = self.values
arr.T.putmask(mask, new)
Expand Down Expand Up @@ -1755,7 +1755,9 @@ def fillna(
# We support filling a DatetimeTZ with a `value` whose timezone
# is different by coercing to object.
# TODO: don't special-case td64
return self.astype(_dtype_obj).fillna(value, limit, inplace, downcast)
return self.coerce_to_target_dtype(value).fillna(
value, limit, inplace, downcast
)

values = self.values
values = values if inplace else values.copy()
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arithmetic/test_timedelta64.py
Original file line number Diff line number Diff line change
Expand Up @@ -2075,7 +2075,7 @@ def test_td64arr_div_numeric_array(
with pytest.raises(TypeError, match=pattern):
vector.astype(object) / tdser

def test_td64arr_mul_int_series(self, box_with_array, names, request):
def test_td64arr_mul_int_series(self, box_with_array, names):
# GH#19042 test for correct name attachment
box = box_with_array
exname = get_expected_name(box, names)
Expand Down