39
39
npt ,
40
40
)
41
41
from pandas .errors import MergeError
42
- from pandas .util ._decorators import (
43
- Appender ,
44
- Substitution ,
45
- cache_readonly ,
46
- )
42
+ from pandas .util ._decorators import cache_readonly
47
43
from pandas .util ._exceptions import find_stack_level
48
44
49
45
from pandas .core .dtypes .base import ExtensionDtype
95
91
ensure_wrapped_if_datetimelike ,
96
92
extract_array ,
97
93
)
98
- from pandas .core .frame import _merge_doc
99
94
from pandas .core .indexes .api import default_index
100
95
from pandas .core .sorting import (
101
96
get_group_index ,
133
128
_known = (np .ndarray , ExtensionArray , Index , ABCSeries )
134
129
135
130
136
- @Substitution ("\n left : DataFrame or named Series" )
137
- @Appender (_merge_doc , indents = 0 )
138
131
def merge (
139
132
left : DataFrame | Series ,
140
133
right : DataFrame | Series ,
@@ -150,6 +143,210 @@ def merge(
150
143
indicator : str | bool = False ,
151
144
validate : str | None = None ,
152
145
) -> DataFrame :
146
+ """
147
+ Merge DataFrame or named Series objects with a database-style join.
148
+
149
+ A named Series object is treated as a DataFrame with a single named column.
150
+
151
+ The join is done on columns or indexes. If joining columns on
152
+ columns, the DataFrame indexes *will be ignored*. Otherwise if joining indexes
153
+ on indexes or indexes on a column or columns, the index will be passed on.
154
+ When performing a cross merge, no column specifications to merge on are
155
+ allowed.
156
+
157
+ .. warning::
158
+
159
+ If both key columns contain rows where the key is a null value, those
160
+ rows will be matched against each other. This is different from usual SQL
161
+ join behaviour and can lead to unexpected results.
162
+
163
+ Parameters
164
+ ----------
165
+ left : DataFrame or named Series
166
+ First pandas object to merge.
167
+ right : DataFrame or named Series
168
+ Second pandas object to merge.
169
+ how : {'left', 'right', 'outer', 'inner', 'cross'}, default 'inner'
170
+ Type of merge to be performed.
171
+
172
+ * left: use only keys from left frame, similar to a SQL left outer join;
173
+ preserve key order.
174
+ * right: use only keys from right frame, similar to a SQL right outer join;
175
+ preserve key order.
176
+ * outer: use union of keys from both frames, similar to a SQL full outer
177
+ join; sort keys lexicographically.
178
+ * inner: use intersection of keys from both frames, similar to a SQL inner
179
+ join; preserve the order of the left keys.
180
+ * cross: creates the cartesian product from both frames, preserves the order
181
+ of the left keys.
182
+ on : label or list
183
+ Column or index level names to join on. These must be found in both
184
+ DataFrames. If `on` is None and not merging on indexes then this defaults
185
+ to the intersection of the columns in both DataFrames.
186
+ left_on : label or list, or array-like
187
+ Column or index level names to join on in the left DataFrame. Can also
188
+ be an array or list of arrays of the length of the left DataFrame.
189
+ These arrays are treated as if they are columns.
190
+ right_on : label or list, or array-like
191
+ Column or index level names to join on in the right DataFrame. Can also
192
+ be an array or list of arrays of the length of the right DataFrame.
193
+ These arrays are treated as if they are columns.
194
+ left_index : bool, default False
195
+ Use the index from the left DataFrame as the join key(s). If it is a
196
+ MultiIndex, the number of keys in the other DataFrame (either the index
197
+ or a number of columns) must match the number of levels.
198
+ right_index : bool, default False
199
+ Use the index from the right DataFrame as the join key. Same caveats as
200
+ left_index.
201
+ sort : bool, default False
202
+ Sort the join keys lexicographically in the result DataFrame. If False,
203
+ the order of the join keys depends on the join type (how keyword).
204
+ suffixes : list-like, default is ("_x", "_y")
205
+ A length-2 sequence where each element is optionally a string
206
+ indicating the suffix to add to overlapping column names in
207
+ `left` and `right` respectively. Pass a value of `None` instead
208
+ of a string to indicate that the column name from `left` or
209
+ `right` should be left as-is, with no suffix. At least one of the
210
+ values must not be None.
211
+ copy : bool, default False
212
+ If False, avoid copy if possible.
213
+
214
+ .. note::
215
+ The `copy` keyword will change behavior in pandas 3.0.
216
+ `Copy-on-Write
217
+ <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__
218
+ will be enabled by default, which means that all methods with a
219
+ `copy` keyword will use a lazy copy mechanism to defer the copy and
220
+ ignore the `copy` keyword. The `copy` keyword will be removed in a
221
+ future version of pandas.
222
+
223
+ You can already get the future behavior and improvements through
224
+ enabling copy on write ``pd.options.mode.copy_on_write = True``
225
+
226
+ .. deprecated:: 3.0.0
227
+ indicator : bool or str, default False
228
+ If True, adds a column to the output DataFrame called "_merge" with
229
+ information on the source of each row. The column can be given a different
230
+ name by providing a string argument. The column will have a Categorical
231
+ type with the value of "left_only" for observations whose merge key only
232
+ appears in the left DataFrame, "right_only" for observations
233
+ whose merge key only appears in the right DataFrame, and "both"
234
+ if the observation's merge key is found in both DataFrames.
235
+
236
+ validate : str, optional
237
+ If specified, checks if merge is of specified type.
238
+
239
+ * "one_to_one" or "1:1": check if merge keys are unique in both
240
+ left and right datasets.
241
+ * "one_to_many" or "1:m": check if merge keys are unique in left
242
+ dataset.
243
+ * "many_to_one" or "m:1": check if merge keys are unique in right
244
+ dataset.
245
+ * "many_to_many" or "m:m": allowed, but does not result in checks.
246
+
247
+ Returns
248
+ -------
249
+ DataFrame
250
+ A DataFrame of the two merged objects.
251
+
252
+ See Also
253
+ --------
254
+ merge_ordered : Merge with optional filling/interpolation.
255
+ merge_asof : Merge on nearest keys.
256
+ DataFrame.join : Similar method using indices.
257
+
258
+ Examples
259
+ --------
260
+ >>> df1 = pd.DataFrame(
261
+ ... {"lkey": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 5]}
262
+ ... )
263
+ >>> df2 = pd.DataFrame(
264
+ ... {"rkey": ["foo", "bar", "baz", "foo"], "value": [5, 6, 7, 8]}
265
+ ... )
266
+ >>> df1
267
+ lkey value
268
+ 0 foo 1
269
+ 1 bar 2
270
+ 2 baz 3
271
+ 3 foo 5
272
+ >>> df2
273
+ rkey value
274
+ 0 foo 5
275
+ 1 bar 6
276
+ 2 baz 7
277
+ 3 foo 8
278
+
279
+ Merge df1 and df2 on the lkey and rkey columns. The value columns have
280
+ the default suffixes, _x and _y, appended.
281
+
282
+ >>> df1.merge(df2, left_on="lkey", right_on="rkey")
283
+ lkey value_x rkey value_y
284
+ 0 foo 1 foo 5
285
+ 1 foo 1 foo 8
286
+ 2 bar 2 bar 6
287
+ 3 baz 3 baz 7
288
+ 4 foo 5 foo 5
289
+ 5 foo 5 foo 8
290
+
291
+ Merge DataFrames df1 and df2 with specified left and right suffixes
292
+ appended to any overlapping columns.
293
+
294
+ >>> df1.merge(df2, left_on="lkey", right_on="rkey", suffixes=("_left", "_right"))
295
+ lkey value_left rkey value_right
296
+ 0 foo 1 foo 5
297
+ 1 foo 1 foo 8
298
+ 2 bar 2 bar 6
299
+ 3 baz 3 baz 7
300
+ 4 foo 5 foo 5
301
+ 5 foo 5 foo 8
302
+
303
+ Merge DataFrames df1 and df2, but raise an exception if the DataFrames have
304
+ any overlapping columns.
305
+
306
+ >>> df1.merge(df2, left_on="lkey", right_on="rkey", suffixes=(False, False))
307
+ Traceback (most recent call last):
308
+ ...
309
+ ValueError: columns overlap but no suffix specified:
310
+ Index(['value'], dtype='object')
311
+
312
+ >>> df1 = pd.DataFrame({"a": ["foo", "bar"], "b": [1, 2]})
313
+ >>> df2 = pd.DataFrame({"a": ["foo", "baz"], "c": [3, 4]})
314
+ >>> df1
315
+ a b
316
+ 0 foo 1
317
+ 1 bar 2
318
+ >>> df2
319
+ a c
320
+ 0 foo 3
321
+ 1 baz 4
322
+
323
+ >>> df1.merge(df2, how="inner", on="a")
324
+ a b c
325
+ 0 foo 1 3
326
+
327
+ >>> df1.merge(df2, how="left", on="a")
328
+ a b c
329
+ 0 foo 1 3.0
330
+ 1 bar 2 NaN
331
+
332
+ >>> df1 = pd.DataFrame({"left": ["foo", "bar"]})
333
+ >>> df2 = pd.DataFrame({"right": [7, 8]})
334
+ >>> df1
335
+ left
336
+ 0 foo
337
+ 1 bar
338
+ >>> df2
339
+ right
340
+ 0 7
341
+ 1 8
342
+
343
+ >>> df1.merge(df2, how="cross")
344
+ left right
345
+ 0 foo 7
346
+ 1 foo 8
347
+ 2 bar 7
348
+ 3 bar 8
349
+ """
153
350
left_df = _validate_operand (left )
154
351
left ._check_copy_deprecation (copy )
155
352
right_df = _validate_operand (right )
0 commit comments