Skip to content

Commit 42f785f

Browse files
DOC: fix PR07 for pandas.merge (pandas-dev#58979)
1 parent 31c2de5 commit 42f785f

File tree

2 files changed

+205
-9
lines changed

2 files changed

+205
-9
lines changed

ci/code_checks.sh

-1
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
464464
-i "pandas.io.stata.StataReader.variable_labels RT03,SA01" \
465465
-i "pandas.io.stata.StataWriter.write_file SA01" \
466466
-i "pandas.json_normalize RT03,SA01" \
467-
-i "pandas.merge PR07" \
468467
-i "pandas.merge_asof PR07,RT03" \
469468
-i "pandas.period_range RT03,SA01" \
470469
-i "pandas.plotting.andrews_curves RT03,SA01" \

pandas/core/reshape/merge.py

+205-8
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,7 @@
3939
npt,
4040
)
4141
from pandas.errors import MergeError
42-
from pandas.util._decorators import (
43-
Appender,
44-
Substitution,
45-
cache_readonly,
46-
)
42+
from pandas.util._decorators import cache_readonly
4743
from pandas.util._exceptions import find_stack_level
4844

4945
from pandas.core.dtypes.base import ExtensionDtype
@@ -95,7 +91,6 @@
9591
ensure_wrapped_if_datetimelike,
9692
extract_array,
9793
)
98-
from pandas.core.frame import _merge_doc
9994
from pandas.core.indexes.api import default_index
10095
from pandas.core.sorting import (
10196
get_group_index,
@@ -133,8 +128,6 @@
133128
_known = (np.ndarray, ExtensionArray, Index, ABCSeries)
134129

135130

136-
@Substitution("\nleft : DataFrame or named Series")
137-
@Appender(_merge_doc, indents=0)
138131
def merge(
139132
left: DataFrame | Series,
140133
right: DataFrame | Series,
@@ -150,6 +143,210 @@ def merge(
150143
indicator: str | bool = False,
151144
validate: str | None = None,
152145
) -> DataFrame:
146+
"""
147+
Merge DataFrame or named Series objects with a database-style join.
148+
149+
A named Series object is treated as a DataFrame with a single named column.
150+
151+
The join is done on columns or indexes. If joining columns on
152+
columns, the DataFrame indexes *will be ignored*. Otherwise if joining indexes
153+
on indexes or indexes on a column or columns, the index will be passed on.
154+
When performing a cross merge, no column specifications to merge on are
155+
allowed.
156+
157+
.. warning::
158+
159+
If both key columns contain rows where the key is a null value, those
160+
rows will be matched against each other. This is different from usual SQL
161+
join behaviour and can lead to unexpected results.
162+
163+
Parameters
164+
----------
165+
left : DataFrame or named Series
166+
First pandas object to merge.
167+
right : DataFrame or named Series
168+
Second pandas object to merge.
169+
how : {'left', 'right', 'outer', 'inner', 'cross'}, default 'inner'
170+
Type of merge to be performed.
171+
172+
* left: use only keys from left frame, similar to a SQL left outer join;
173+
preserve key order.
174+
* right: use only keys from right frame, similar to a SQL right outer join;
175+
preserve key order.
176+
* outer: use union of keys from both frames, similar to a SQL full outer
177+
join; sort keys lexicographically.
178+
* inner: use intersection of keys from both frames, similar to a SQL inner
179+
join; preserve the order of the left keys.
180+
* cross: creates the cartesian product from both frames, preserves the order
181+
of the left keys.
182+
on : label or list
183+
Column or index level names to join on. These must be found in both
184+
DataFrames. If `on` is None and not merging on indexes then this defaults
185+
to the intersection of the columns in both DataFrames.
186+
left_on : label or list, or array-like
187+
Column or index level names to join on in the left DataFrame. Can also
188+
be an array or list of arrays of the length of the left DataFrame.
189+
These arrays are treated as if they are columns.
190+
right_on : label or list, or array-like
191+
Column or index level names to join on in the right DataFrame. Can also
192+
be an array or list of arrays of the length of the right DataFrame.
193+
These arrays are treated as if they are columns.
194+
left_index : bool, default False
195+
Use the index from the left DataFrame as the join key(s). If it is a
196+
MultiIndex, the number of keys in the other DataFrame (either the index
197+
or a number of columns) must match the number of levels.
198+
right_index : bool, default False
199+
Use the index from the right DataFrame as the join key. Same caveats as
200+
left_index.
201+
sort : bool, default False
202+
Sort the join keys lexicographically in the result DataFrame. If False,
203+
the order of the join keys depends on the join type (how keyword).
204+
suffixes : list-like, default is ("_x", "_y")
205+
A length-2 sequence where each element is optionally a string
206+
indicating the suffix to add to overlapping column names in
207+
`left` and `right` respectively. Pass a value of `None` instead
208+
of a string to indicate that the column name from `left` or
209+
`right` should be left as-is, with no suffix. At least one of the
210+
values must not be None.
211+
copy : bool, default False
212+
If False, avoid copy if possible.
213+
214+
.. note::
215+
The `copy` keyword will change behavior in pandas 3.0.
216+
`Copy-on-Write
217+
<https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__
218+
will be enabled by default, which means that all methods with a
219+
`copy` keyword will use a lazy copy mechanism to defer the copy and
220+
ignore the `copy` keyword. The `copy` keyword will be removed in a
221+
future version of pandas.
222+
223+
You can already get the future behavior and improvements through
224+
enabling copy on write ``pd.options.mode.copy_on_write = True``
225+
226+
.. deprecated:: 3.0.0
227+
indicator : bool or str, default False
228+
If True, adds a column to the output DataFrame called "_merge" with
229+
information on the source of each row. The column can be given a different
230+
name by providing a string argument. The column will have a Categorical
231+
type with the value of "left_only" for observations whose merge key only
232+
appears in the left DataFrame, "right_only" for observations
233+
whose merge key only appears in the right DataFrame, and "both"
234+
if the observation's merge key is found in both DataFrames.
235+
236+
validate : str, optional
237+
If specified, checks if merge is of specified type.
238+
239+
* "one_to_one" or "1:1": check if merge keys are unique in both
240+
left and right datasets.
241+
* "one_to_many" or "1:m": check if merge keys are unique in left
242+
dataset.
243+
* "many_to_one" or "m:1": check if merge keys are unique in right
244+
dataset.
245+
* "many_to_many" or "m:m": allowed, but does not result in checks.
246+
247+
Returns
248+
-------
249+
DataFrame
250+
A DataFrame of the two merged objects.
251+
252+
See Also
253+
--------
254+
merge_ordered : Merge with optional filling/interpolation.
255+
merge_asof : Merge on nearest keys.
256+
DataFrame.join : Similar method using indices.
257+
258+
Examples
259+
--------
260+
>>> df1 = pd.DataFrame(
261+
... {"lkey": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 5]}
262+
... )
263+
>>> df2 = pd.DataFrame(
264+
... {"rkey": ["foo", "bar", "baz", "foo"], "value": [5, 6, 7, 8]}
265+
... )
266+
>>> df1
267+
lkey value
268+
0 foo 1
269+
1 bar 2
270+
2 baz 3
271+
3 foo 5
272+
>>> df2
273+
rkey value
274+
0 foo 5
275+
1 bar 6
276+
2 baz 7
277+
3 foo 8
278+
279+
Merge df1 and df2 on the lkey and rkey columns. The value columns have
280+
the default suffixes, _x and _y, appended.
281+
282+
>>> df1.merge(df2, left_on="lkey", right_on="rkey")
283+
lkey value_x rkey value_y
284+
0 foo 1 foo 5
285+
1 foo 1 foo 8
286+
2 bar 2 bar 6
287+
3 baz 3 baz 7
288+
4 foo 5 foo 5
289+
5 foo 5 foo 8
290+
291+
Merge DataFrames df1 and df2 with specified left and right suffixes
292+
appended to any overlapping columns.
293+
294+
>>> df1.merge(df2, left_on="lkey", right_on="rkey", suffixes=("_left", "_right"))
295+
lkey value_left rkey value_right
296+
0 foo 1 foo 5
297+
1 foo 1 foo 8
298+
2 bar 2 bar 6
299+
3 baz 3 baz 7
300+
4 foo 5 foo 5
301+
5 foo 5 foo 8
302+
303+
Merge DataFrames df1 and df2, but raise an exception if the DataFrames have
304+
any overlapping columns.
305+
306+
>>> df1.merge(df2, left_on="lkey", right_on="rkey", suffixes=(False, False))
307+
Traceback (most recent call last):
308+
...
309+
ValueError: columns overlap but no suffix specified:
310+
Index(['value'], dtype='object')
311+
312+
>>> df1 = pd.DataFrame({"a": ["foo", "bar"], "b": [1, 2]})
313+
>>> df2 = pd.DataFrame({"a": ["foo", "baz"], "c": [3, 4]})
314+
>>> df1
315+
a b
316+
0 foo 1
317+
1 bar 2
318+
>>> df2
319+
a c
320+
0 foo 3
321+
1 baz 4
322+
323+
>>> df1.merge(df2, how="inner", on="a")
324+
a b c
325+
0 foo 1 3
326+
327+
>>> df1.merge(df2, how="left", on="a")
328+
a b c
329+
0 foo 1 3.0
330+
1 bar 2 NaN
331+
332+
>>> df1 = pd.DataFrame({"left": ["foo", "bar"]})
333+
>>> df2 = pd.DataFrame({"right": [7, 8]})
334+
>>> df1
335+
left
336+
0 foo
337+
1 bar
338+
>>> df2
339+
right
340+
0 7
341+
1 8
342+
343+
>>> df1.merge(df2, how="cross")
344+
left right
345+
0 foo 7
346+
1 foo 8
347+
2 bar 7
348+
3 bar 8
349+
"""
153350
left_df = _validate_operand(left)
154351
left._check_copy_deprecation(copy)
155352
right_df = _validate_operand(right)

0 commit comments

Comments
 (0)