Skip to content

Commit d2e6dc2

Browse files
jbrockmendelproost
authored andcommitted
CLN: annotation in reshape.merge (pandas-dev#29490)
1 parent 74109bc commit d2e6dc2

File tree

1 file changed

+66
-48
lines changed

1 file changed

+66
-48
lines changed

pandas/core/reshape/merge.py

+66-48
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import datetime
77
from functools import partial
88
import string
9+
from typing import TYPE_CHECKING, Optional, Tuple, Union
910
import warnings
1011

1112
import numpy as np
@@ -39,29 +40,33 @@
3940
from pandas.core.dtypes.missing import isna, na_value_for_dtype
4041

4142
from pandas import Categorical, Index, MultiIndex
43+
from pandas._typing import FrameOrSeries
4244
import pandas.core.algorithms as algos
4345
from pandas.core.arrays.categorical import _recode_for_categories
4446
import pandas.core.common as com
4547
from pandas.core.frame import _merge_doc
4648
from pandas.core.internals import _transform_index, concatenate_block_managers
4749
from pandas.core.sorting import is_int64_overflow_possible
4850

51+
if TYPE_CHECKING:
52+
from pandas import DataFrame, Series # noqa:F401
53+
4954

5055
@Substitution("\nleft : DataFrame")
5156
@Appender(_merge_doc, indents=0)
5257
def merge(
5358
left,
5459
right,
55-
how="inner",
60+
how: str = "inner",
5661
on=None,
5762
left_on=None,
5863
right_on=None,
59-
left_index=False,
60-
right_index=False,
61-
sort=False,
64+
left_index: bool = False,
65+
right_index: bool = False,
66+
sort: bool = False,
6267
suffixes=("_x", "_y"),
63-
copy=True,
64-
indicator=False,
68+
copy: bool = True,
69+
indicator: bool = False,
6570
validate=None,
6671
):
6772
op = _MergeOperation(
@@ -86,7 +91,9 @@ def merge(
8691
merge.__doc__ = _merge_doc % "\nleft : DataFrame"
8792

8893

89-
def _groupby_and_merge(by, on, left, right, _merge_pieces, check_duplicates=True):
94+
def _groupby_and_merge(
95+
by, on, left, right, _merge_pieces, check_duplicates: bool = True
96+
):
9097
"""
9198
groupby & merge; we are always performing a left-by type operation
9299
@@ -172,7 +179,7 @@ def merge_ordered(
172179
right_by=None,
173180
fill_method=None,
174181
suffixes=("_x", "_y"),
175-
how="outer",
182+
how: str = "outer",
176183
):
177184
"""
178185
Perform merge with optional filling/interpolation.
@@ -298,14 +305,14 @@ def merge_asof(
298305
on=None,
299306
left_on=None,
300307
right_on=None,
301-
left_index=False,
302-
right_index=False,
308+
left_index: bool = False,
309+
right_index: bool = False,
303310
by=None,
304311
left_by=None,
305312
right_by=None,
306313
suffixes=("_x", "_y"),
307314
tolerance=None,
308-
allow_exact_matches=True,
315+
allow_exact_matches: bool = True,
309316
direction="backward",
310317
):
311318
"""
@@ -533,33 +540,33 @@ def merge_asof(
533540
# TODO: only copy DataFrames when modification necessary
534541
class _MergeOperation:
535542
"""
536-
Perform a database (SQL) merge operation between two DataFrame objects
537-
using either columns as keys or their row indexes
543+
Perform a database (SQL) merge operation between two DataFrame or Series
544+
objects using either columns as keys or their row indexes
538545
"""
539546

540547
_merge_type = "merge"
541548

542549
def __init__(
543550
self,
544-
left,
545-
right,
546-
how="inner",
551+
left: Union["Series", "DataFrame"],
552+
right: Union["Series", "DataFrame"],
553+
how: str = "inner",
547554
on=None,
548555
left_on=None,
549556
right_on=None,
550557
axis=1,
551-
left_index=False,
552-
right_index=False,
553-
sort=True,
558+
left_index: bool = False,
559+
right_index: bool = False,
560+
sort: bool = True,
554561
suffixes=("_x", "_y"),
555-
copy=True,
556-
indicator=False,
562+
copy: bool = True,
563+
indicator: bool = False,
557564
validate=None,
558565
):
559-
left = validate_operand(left)
560-
right = validate_operand(right)
561-
self.left = self.orig_left = left
562-
self.right = self.orig_right = right
566+
_left = _validate_operand(left)
567+
_right = _validate_operand(right)
568+
self.left = self.orig_left = _validate_operand(_left) # type: "DataFrame"
569+
self.right = self.orig_right = _validate_operand(_right) # type: "DataFrame"
563570
self.how = how
564571
self.axis = axis
565572

@@ -577,7 +584,7 @@ def __init__(
577584
self.indicator = indicator
578585

579586
if isinstance(self.indicator, str):
580-
self.indicator_name = self.indicator
587+
self.indicator_name = self.indicator # type: Optional[str]
581588
elif isinstance(self.indicator, bool):
582589
self.indicator_name = "_merge" if self.indicator else None
583590
else:
@@ -597,11 +604,11 @@ def __init__(
597604
)
598605

599606
# warn user when merging between different levels
600-
if left.columns.nlevels != right.columns.nlevels:
607+
if _left.columns.nlevels != _right.columns.nlevels:
601608
msg = (
602609
"merging between different levels can give an unintended "
603610
"result ({left} levels on the left, {right} on the right)"
604-
).format(left=left.columns.nlevels, right=right.columns.nlevels)
611+
).format(left=_left.columns.nlevels, right=_right.columns.nlevels)
605612
warnings.warn(msg, UserWarning)
606613

607614
self._validate_specification()
@@ -658,7 +665,9 @@ def get_result(self):
658665

659666
return result
660667

661-
def _indicator_pre_merge(self, left, right):
668+
def _indicator_pre_merge(
669+
self, left: "DataFrame", right: "DataFrame"
670+
) -> Tuple["DataFrame", "DataFrame"]:
662671

663672
columns = left.columns.union(right.columns)
664673

@@ -878,7 +887,12 @@ def _get_join_info(self):
878887
return join_index, left_indexer, right_indexer
879888

880889
def _create_join_index(
881-
self, index, other_index, indexer, other_indexer, how="left"
890+
self,
891+
index: Index,
892+
other_index: Index,
893+
indexer,
894+
other_indexer,
895+
how: str = "left",
882896
):
883897
"""
884898
Create a join index by rearranging one index to match another
@@ -1263,7 +1277,9 @@ def _validate(self, validate: str):
12631277
raise ValueError("Not a valid argument for validate")
12641278

12651279

1266-
def _get_join_indexers(left_keys, right_keys, sort=False, how="inner", **kwargs):
1280+
def _get_join_indexers(
1281+
left_keys, right_keys, sort: bool = False, how: str = "inner", **kwargs
1282+
):
12671283
"""
12681284
12691285
Parameters
@@ -1410,13 +1426,13 @@ def __init__(
14101426
on=None,
14111427
left_on=None,
14121428
right_on=None,
1413-
left_index=False,
1414-
right_index=False,
1429+
left_index: bool = False,
1430+
right_index: bool = False,
14151431
axis=1,
14161432
suffixes=("_x", "_y"),
1417-
copy=True,
1433+
copy: bool = True,
14181434
fill_method=None,
1419-
how="outer",
1435+
how: str = "outer",
14201436
):
14211437

14221438
self.fill_method = fill_method
@@ -1508,18 +1524,18 @@ def __init__(
15081524
on=None,
15091525
left_on=None,
15101526
right_on=None,
1511-
left_index=False,
1512-
right_index=False,
1527+
left_index: bool = False,
1528+
right_index: bool = False,
15131529
by=None,
15141530
left_by=None,
15151531
right_by=None,
15161532
axis=1,
15171533
suffixes=("_x", "_y"),
1518-
copy=True,
1534+
copy: bool = True,
15191535
fill_method=None,
1520-
how="asof",
1536+
how: str = "asof",
15211537
tolerance=None,
1522-
allow_exact_matches=True,
1538+
allow_exact_matches: bool = True,
15231539
direction="backward",
15241540
):
15251541

@@ -1757,13 +1773,15 @@ def flip(xs):
17571773
return func(left_values, right_values, self.allow_exact_matches, tolerance)
17581774

17591775

1760-
def _get_multiindex_indexer(join_keys, index, sort):
1776+
def _get_multiindex_indexer(join_keys, index: MultiIndex, sort: bool):
17611777

17621778
# bind `sort` argument
17631779
fkeys = partial(_factorize_keys, sort=sort)
17641780

17651781
# left & right join labels and num. of levels at each location
1766-
rcodes, lcodes, shape = map(list, zip(*map(fkeys, index.levels, join_keys)))
1782+
mapped = (fkeys(index.levels[n], join_keys[n]) for n in range(len(index.levels)))
1783+
zipped = zip(*mapped)
1784+
rcodes, lcodes, shape = [list(x) for x in zipped]
17671785
if sort:
17681786
rcodes = list(map(np.take, rcodes, index.codes))
17691787
else:
@@ -1791,7 +1809,7 @@ def _get_multiindex_indexer(join_keys, index, sort):
17911809
return libjoin.left_outer_join(lkey, rkey, count, sort=sort)
17921810

17931811

1794-
def _get_single_indexer(join_key, index, sort=False):
1812+
def _get_single_indexer(join_key, index, sort: bool = False):
17951813
left_key, right_key, count = _factorize_keys(join_key, index, sort=sort)
17961814

17971815
left_indexer, right_indexer = libjoin.left_outer_join(
@@ -1801,7 +1819,7 @@ def _get_single_indexer(join_key, index, sort=False):
18011819
return left_indexer, right_indexer
18021820

18031821

1804-
def _left_join_on_index(left_ax, right_ax, join_keys, sort=False):
1822+
def _left_join_on_index(left_ax: Index, right_ax: Index, join_keys, sort: bool = False):
18051823
if len(join_keys) > 1:
18061824
if not (
18071825
(isinstance(right_ax, MultiIndex) and len(join_keys) == right_ax.nlevels)
@@ -1915,7 +1933,7 @@ def _factorize_keys(lk, rk, sort=True):
19151933
return llab, rlab, count
19161934

19171935

1918-
def _sort_labels(uniques, left, right):
1936+
def _sort_labels(uniques: np.ndarray, left, right):
19191937
if not isinstance(uniques, np.ndarray):
19201938
# tuplesafe
19211939
uniques = Index(uniques).values
@@ -1930,7 +1948,7 @@ def _sort_labels(uniques, left, right):
19301948
return new_left, new_right
19311949

19321950

1933-
def _get_join_keys(llab, rlab, shape, sort):
1951+
def _get_join_keys(llab, rlab, shape, sort: bool):
19341952

19351953
# how many levels can be done without overflow
19361954
pred = lambda i: not is_int64_overflow_possible(shape[:i])
@@ -1970,7 +1988,7 @@ def _any(x) -> bool:
19701988
return x is not None and com.any_not_none(*x)
19711989

19721990

1973-
def validate_operand(obj):
1991+
def _validate_operand(obj: FrameOrSeries) -> "DataFrame":
19741992
if isinstance(obj, ABCDataFrame):
19751993
return obj
19761994
elif isinstance(obj, ABCSeries):
@@ -1985,7 +2003,7 @@ def validate_operand(obj):
19852003
)
19862004

19872005

1988-
def _items_overlap_with_suffix(left, lsuffix, right, rsuffix):
2006+
def _items_overlap_with_suffix(left: Index, lsuffix, right: Index, rsuffix):
19892007
"""
19902008
If two indices overlap, add suffixes to overlapping entries.
19912009

0 commit comments

Comments
 (0)