3
3
"""
4
4
from __future__ import annotations
5
5
6
- import copy as cp
7
6
import datetime
8
7
from functools import partial
9
8
import string
13
12
Literal ,
14
13
Sequence ,
15
14
cast ,
15
+ final ,
16
16
)
17
17
import uuid
18
18
import warnings
@@ -655,8 +655,8 @@ class _MergeOperation:
655
655
indicator : str | bool
656
656
validate : str | None
657
657
join_names : list [Hashable ]
658
- right_join_keys : list [AnyArrayLike ]
659
- left_join_keys : list [AnyArrayLike ]
658
+ right_join_keys : list [ArrayLike ]
659
+ left_join_keys : list [ArrayLike ]
660
660
661
661
def __init__ (
662
662
self ,
@@ -743,6 +743,7 @@ def __init__(
743
743
if validate is not None :
744
744
self ._validate (validate )
745
745
746
+ @final
746
747
def _reindex_and_concat (
747
748
self ,
748
749
join_index : Index ,
@@ -821,12 +822,14 @@ def get_result(self, copy: bool | None = True) -> DataFrame:
821
822
822
823
return result .__finalize__ (self , method = "merge" )
823
824
825
+ @final
824
826
def _maybe_drop_cross_column (
825
827
self , result : DataFrame , cross_col : str | None
826
828
) -> None :
827
829
if cross_col is not None :
828
830
del result [cross_col ]
829
831
832
+ @final
830
833
@cache_readonly
831
834
def _indicator_name (self ) -> str | None :
832
835
if isinstance (self .indicator , str ):
@@ -838,6 +841,7 @@ def _indicator_name(self) -> str | None:
838
841
"indicator option can only accept boolean or string arguments"
839
842
)
840
843
844
+ @final
841
845
def _indicator_pre_merge (
842
846
self , left : DataFrame , right : DataFrame
843
847
) -> tuple [DataFrame , DataFrame ]:
@@ -865,6 +869,7 @@ def _indicator_pre_merge(
865
869
866
870
return left , right
867
871
872
+ @final
868
873
def _indicator_post_merge (self , result : DataFrame ) -> DataFrame :
869
874
result ["_left_indicator" ] = result ["_left_indicator" ].fillna (0 )
870
875
result ["_right_indicator" ] = result ["_right_indicator" ].fillna (0 )
@@ -880,6 +885,7 @@ def _indicator_post_merge(self, result: DataFrame) -> DataFrame:
880
885
result = result .drop (labels = ["_left_indicator" , "_right_indicator" ], axis = 1 )
881
886
return result
882
887
888
+ @final
883
889
def _maybe_restore_index_levels (self , result : DataFrame ) -> None :
884
890
"""
885
891
Restore index levels specified as `on` parameters
@@ -923,11 +929,12 @@ def _maybe_restore_index_levels(self, result: DataFrame) -> None:
923
929
if names_to_restore :
924
930
result .set_index (names_to_restore , inplace = True )
925
931
932
+ @final
926
933
def _maybe_add_join_keys (
927
934
self ,
928
935
result : DataFrame ,
929
- left_indexer : np .ndarray | None ,
930
- right_indexer : np .ndarray | None ,
936
+ left_indexer : npt . NDArray [ np .intp ] | None ,
937
+ right_indexer : npt . NDArray [ np .intp ] | None ,
931
938
) -> None :
932
939
left_has_missing = None
933
940
right_has_missing = None
@@ -1032,6 +1039,7 @@ def _get_join_indexers(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]
1032
1039
self .left_join_keys , self .right_join_keys , sort = self .sort , how = self .how
1033
1040
)
1034
1041
1042
+ @final
1035
1043
def _get_join_info (
1036
1044
self ,
1037
1045
) -> tuple [Index , npt .NDArray [np .intp ] | None , npt .NDArray [np .intp ] | None ]:
@@ -1093,6 +1101,7 @@ def _get_join_info(
1093
1101
join_index = default_index (0 ).set_names (join_index .name )
1094
1102
return join_index , left_indexer , right_indexer
1095
1103
1104
+ @final
1096
1105
def _create_join_index (
1097
1106
self ,
1098
1107
index : Index ,
@@ -1129,7 +1138,7 @@ def _create_join_index(
1129
1138
1130
1139
def _get_merge_keys (
1131
1140
self ,
1132
- ) -> tuple [list [AnyArrayLike ], list [AnyArrayLike ], list [Hashable ]]:
1141
+ ) -> tuple [list [ArrayLike ], list [ArrayLike ], list [Hashable ]]:
1133
1142
"""
1134
1143
Note: has side effects (copy/delete key columns)
1135
1144
@@ -1145,8 +1154,8 @@ def _get_merge_keys(
1145
1154
"""
1146
1155
# left_keys, right_keys entries can actually be anything listlike
1147
1156
# with a 'dtype' attr
1148
- left_keys : list [AnyArrayLike ] = []
1149
- right_keys : list [AnyArrayLike ] = []
1157
+ left_keys : list [ArrayLike ] = []
1158
+ right_keys : list [ArrayLike ] = []
1150
1159
join_names : list [Hashable ] = []
1151
1160
right_drop : list [Hashable ] = []
1152
1161
left_drop : list [Hashable ] = []
@@ -1169,11 +1178,13 @@ def _get_merge_keys(
1169
1178
# ugh, spaghetti re #733
1170
1179
if _any (self .left_on ) and _any (self .right_on ):
1171
1180
for lk , rk in zip (self .left_on , self .right_on ):
1181
+ lk = extract_array (lk , extract_numpy = True )
1182
+ rk = extract_array (rk , extract_numpy = True )
1172
1183
if is_lkey (lk ):
1173
- lk = cast (AnyArrayLike , lk )
1184
+ lk = cast (ArrayLike , lk )
1174
1185
left_keys .append (lk )
1175
1186
if is_rkey (rk ):
1176
- rk = cast (AnyArrayLike , rk )
1187
+ rk = cast (ArrayLike , rk )
1177
1188
right_keys .append (rk )
1178
1189
join_names .append (None ) # what to do?
1179
1190
else :
@@ -1185,7 +1196,7 @@ def _get_merge_keys(
1185
1196
join_names .append (rk )
1186
1197
else :
1187
1198
# work-around for merge_asof(right_index=True)
1188
- right_keys .append (right .index )
1199
+ right_keys .append (right .index . _values )
1189
1200
join_names .append (right .index .name )
1190
1201
else :
1191
1202
if not is_rkey (rk ):
@@ -1196,7 +1207,7 @@ def _get_merge_keys(
1196
1207
right_keys .append (right ._get_label_or_level_values (rk ))
1197
1208
else :
1198
1209
# work-around for merge_asof(right_index=True)
1199
- right_keys .append (right .index )
1210
+ right_keys .append (right .index . _values )
1200
1211
if lk is not None and lk == rk : # FIXME: what about other NAs?
1201
1212
# avoid key upcast in corner case (length-0)
1202
1213
lk = cast (Hashable , lk )
@@ -1205,7 +1216,7 @@ def _get_merge_keys(
1205
1216
else :
1206
1217
left_drop .append (lk )
1207
1218
else :
1208
- rk = cast (AnyArrayLike , rk )
1219
+ rk = cast (ArrayLike , rk )
1209
1220
right_keys .append (rk )
1210
1221
if lk is not None :
1211
1222
# Then we're either Hashable or a wrong-length arraylike,
@@ -1215,12 +1226,13 @@ def _get_merge_keys(
1215
1226
join_names .append (lk )
1216
1227
else :
1217
1228
# work-around for merge_asof(left_index=True)
1218
- left_keys .append (left .index )
1229
+ left_keys .append (left .index . _values )
1219
1230
join_names .append (left .index .name )
1220
1231
elif _any (self .left_on ):
1221
1232
for k in self .left_on :
1222
1233
if is_lkey (k ):
1223
- k = cast (AnyArrayLike , k )
1234
+ k = extract_array (k , extract_numpy = True )
1235
+ k = cast (ArrayLike , k )
1224
1236
left_keys .append (k )
1225
1237
join_names .append (None )
1226
1238
else :
@@ -1240,8 +1252,9 @@ def _get_merge_keys(
1240
1252
right_keys = [self .right .index ._values ]
1241
1253
elif _any (self .right_on ):
1242
1254
for k in self .right_on :
1255
+ k = extract_array (k , extract_numpy = True )
1243
1256
if is_rkey (k ):
1244
- k = cast (AnyArrayLike , k )
1257
+ k = cast (ArrayLike , k )
1245
1258
right_keys .append (k )
1246
1259
join_names .append (None )
1247
1260
else :
@@ -1268,6 +1281,7 @@ def _get_merge_keys(
1268
1281
1269
1282
return left_keys , right_keys , join_names
1270
1283
1284
+ @final
1271
1285
def _maybe_coerce_merge_keys (self ) -> None :
1272
1286
# we have valid merges but we may have to further
1273
1287
# coerce these if they are originally incompatible types
@@ -1432,6 +1446,7 @@ def _maybe_coerce_merge_keys(self) -> None:
1432
1446
self .right = self .right .copy ()
1433
1447
self .right [name ] = self .right [name ].astype (typ )
1434
1448
1449
+ @final
1435
1450
def _create_cross_configuration (
1436
1451
self , left : DataFrame , right : DataFrame
1437
1452
) -> tuple [DataFrame , DataFrame , JoinHow , str ]:
@@ -1610,11 +1625,10 @@ def _validate(self, validate: str) -> None:
1610
1625
1611
1626
1612
1627
def get_join_indexers (
1613
- left_keys : list [AnyArrayLike ],
1614
- right_keys : list [AnyArrayLike ],
1628
+ left_keys : list [ArrayLike ],
1629
+ right_keys : list [ArrayLike ],
1615
1630
sort : bool = False ,
1616
1631
how : MergeHow | Literal ["asof" ] = "inner" ,
1617
- ** kwargs ,
1618
1632
) -> tuple [npt .NDArray [np .intp ], npt .NDArray [np .intp ]]:
1619
1633
"""
1620
1634
@@ -1667,7 +1681,7 @@ def get_join_indexers(
1667
1681
1668
1682
lkey , rkey , count = _factorize_keys (lkey , rkey , sort = sort , how = how )
1669
1683
# preserve left frame order if how == 'left' and sort == False
1670
- kwargs = cp . copy ( kwargs )
1684
+ kwargs = {}
1671
1685
if how in ("left" , "right" ):
1672
1686
kwargs ["sort" ] = sort
1673
1687
join_func = {
@@ -1812,8 +1826,8 @@ def get_result(self, copy: bool | None = True) -> DataFrame:
1812
1826
self .left ._info_axis , self .right ._info_axis , self .suffixes
1813
1827
)
1814
1828
1815
- left_join_indexer : np .ndarray | None
1816
- right_join_indexer : np .ndarray | None
1829
+ left_join_indexer : npt . NDArray [ np .intp ] | None
1830
+ right_join_indexer : npt . NDArray [ np .intp ] | None
1817
1831
1818
1832
if self .fill_method == "ffill" :
1819
1833
if left_indexer is None :
@@ -1984,7 +1998,7 @@ def _validate_left_right_on(self, left_on, right_on):
1984
1998
1985
1999
def _get_merge_keys (
1986
2000
self ,
1987
- ) -> tuple [list [AnyArrayLike ], list [AnyArrayLike ], list [Hashable ]]:
2001
+ ) -> tuple [list [ArrayLike ], list [ArrayLike ], list [Hashable ]]:
1988
2002
# note this function has side effects
1989
2003
(left_join_keys , right_join_keys , join_names ) = super ()._get_merge_keys ()
1990
2004
@@ -2016,8 +2030,7 @@ def _get_merge_keys(
2016
2030
# validate tolerance; datetime.timedelta or Timedelta if we have a DTI
2017
2031
if self .tolerance is not None :
2018
2032
if self .left_index :
2019
- # Actually more specifically an Index
2020
- lt = cast (AnyArrayLike , self .left .index )
2033
+ lt = self .left .index ._values
2021
2034
else :
2022
2035
lt = left_join_keys [- 1 ]
2023
2036
@@ -2026,19 +2039,19 @@ def _get_merge_keys(
2026
2039
f"with type { repr (lt .dtype )} "
2027
2040
)
2028
2041
2029
- if needs_i8_conversion (getattr ( lt , " dtype" , None ) ):
2042
+ if needs_i8_conversion (lt . dtype ):
2030
2043
if not isinstance (self .tolerance , datetime .timedelta ):
2031
2044
raise MergeError (msg )
2032
2045
if self .tolerance < Timedelta (0 ):
2033
2046
raise MergeError ("tolerance must be positive" )
2034
2047
2035
- elif is_integer_dtype (lt ):
2048
+ elif is_integer_dtype (lt . dtype ):
2036
2049
if not is_integer (self .tolerance ):
2037
2050
raise MergeError (msg )
2038
2051
if self .tolerance < 0 :
2039
2052
raise MergeError ("tolerance must be positive" )
2040
2053
2041
- elif is_float_dtype (lt ):
2054
+ elif is_float_dtype (lt . dtype ):
2042
2055
if not is_number (self .tolerance ):
2043
2056
raise MergeError (msg )
2044
2057
# error: Unsupported operand types for > ("int" and "Number")
@@ -2061,10 +2074,10 @@ def _get_merge_keys(
2061
2074
def _get_join_indexers (self ) -> tuple [npt .NDArray [np .intp ], npt .NDArray [np .intp ]]:
2062
2075
"""return the join indexers"""
2063
2076
2064
- def flip (xs : list [AnyArrayLike ]) -> np .ndarray :
2077
+ def flip (xs : list [ArrayLike ]) -> np .ndarray :
2065
2078
"""unlike np.transpose, this returns an array of tuples"""
2066
2079
2067
- def injection (obj : AnyArrayLike ):
2080
+ def injection (obj : ArrayLike ):
2068
2081
if not isinstance (obj .dtype , ExtensionDtype ):
2069
2082
# ndarray
2070
2083
return obj
@@ -2212,11 +2225,11 @@ def injection(obj: AnyArrayLike):
2212
2225
2213
2226
2214
2227
def _get_multiindex_indexer (
2215
- join_keys : list [AnyArrayLike ], index : MultiIndex , sort : bool
2228
+ join_keys : list [ArrayLike ], index : MultiIndex , sort : bool
2216
2229
) -> tuple [npt .NDArray [np .intp ], npt .NDArray [np .intp ]]:
2217
2230
# left & right join labels and num. of levels at each location
2218
2231
mapped = (
2219
- _factorize_keys (index .levels [n ], join_keys [n ], sort = sort )
2232
+ _factorize_keys (index .levels [n ]. _values , join_keys [n ], sort = sort )
2220
2233
for n in range (index .nlevels )
2221
2234
)
2222
2235
zipped = zip (* mapped )
@@ -2249,7 +2262,7 @@ def _get_multiindex_indexer(
2249
2262
2250
2263
2251
2264
def _get_single_indexer (
2252
- join_key : AnyArrayLike , index : Index , sort : bool = False
2265
+ join_key : ArrayLike , index : Index , sort : bool = False
2253
2266
) -> tuple [npt .NDArray [np .intp ], npt .NDArray [np .intp ]]:
2254
2267
left_key , right_key , count = _factorize_keys (join_key , index ._values , sort = sort )
2255
2268
@@ -2294,7 +2307,7 @@ def _get_no_sort_one_missing_indexer(
2294
2307
2295
2308
2296
2309
def _left_join_on_index (
2297
- left_ax : Index , right_ax : Index , join_keys : list [AnyArrayLike ], sort : bool = False
2310
+ left_ax : Index , right_ax : Index , join_keys : list [ArrayLike ], sort : bool = False
2298
2311
) -> tuple [Index , npt .NDArray [np .intp ] | None , npt .NDArray [np .intp ]]:
2299
2312
if isinstance (right_ax , MultiIndex ):
2300
2313
left_indexer , right_indexer = _get_multiindex_indexer (
@@ -2315,8 +2328,8 @@ def _left_join_on_index(
2315
2328
2316
2329
2317
2330
def _factorize_keys (
2318
- lk : AnyArrayLike ,
2319
- rk : AnyArrayLike ,
2331
+ lk : ArrayLike ,
2332
+ rk : ArrayLike ,
2320
2333
sort : bool = True ,
2321
2334
how : MergeHow | Literal ["asof" ] = "inner" ,
2322
2335
) -> tuple [npt .NDArray [np .intp ], npt .NDArray [np .intp ], int ]:
@@ -2327,9 +2340,9 @@ def _factorize_keys(
2327
2340
2328
2341
Parameters
2329
2342
----------
2330
- lk : ndarray, ExtensionArray, Index, or Series
2343
+ lk : ndarray, ExtensionArray
2331
2344
Left key.
2332
- rk : ndarray, ExtensionArray, Index, or Series
2345
+ rk : ndarray, ExtensionArray
2333
2346
Right key.
2334
2347
sort : bool, defaults to True
2335
2348
If True, the encoding is done such that the unique elements in the
@@ -2370,9 +2383,6 @@ def _factorize_keys(
2370
2383
>>> pd.core.reshape.merge._factorize_keys(lk, rk, sort=False)
2371
2384
(array([0, 1, 2]), array([0, 1]), 3)
2372
2385
"""
2373
- # Some pre-processing for non-ndarray lk / rk
2374
- lk = extract_array (lk , extract_numpy = True , extract_range = True )
2375
- rk = extract_array (rk , extract_numpy = True , extract_range = True )
2376
2386
# TODO: if either is a RangeIndex, we can likely factorize more efficiently?
2377
2387
2378
2388
if (
0 commit comments