|
138 | 138 | """
|
139 | 139 |
|
140 | 140 | _merge_doc = """
|
141 |
| -Merge DataFrame or named Series objects by performing a database-style join |
142 |
| -operation by columns or indexes. |
| 141 | +Merge DataFrame or named Series objects with a database-style join. |
143 | 142 |
|
144 |
| -If joining columns on columns, the DataFrame indexes *will be |
145 |
| -ignored*. Otherwise if joining indexes on indexes or indexes on a column or |
146 |
| -columns, the index will be passed on. |
| 143 | +The join is done on columns or indexes. If joining columns on |
| 144 | +columns, the DataFrame indexes *will be ignored*. Otherwise if joining indexes |
| 145 | +on indexes or indexes on a column or columns, the index will be passed on. |
147 | 146 |
|
148 | 147 | Parameters
|
149 | 148 | ----------%s
|
|
153 | 152 | Type of merge to be performed.
|
154 | 153 |
|
155 | 154 | * left: use only keys from left frame, similar to a SQL left outer join;
|
156 |
| - preserve key order |
| 155 | + preserve key order. |
157 | 156 | * right: use only keys from right frame, similar to a SQL right outer join;
|
158 |
| - preserve key order |
| 157 | + preserve key order. |
159 | 158 | * outer: use union of keys from both frames, similar to a SQL full outer
|
160 |
| - join; sort keys lexicographically |
| 159 | + join; sort keys lexicographically. |
161 | 160 | * inner: use intersection of keys from both frames, similar to a SQL inner
|
162 |
| - join; preserve the order of the left keys |
| 161 | + join; preserve the order of the left keys. |
163 | 162 | on : label or list
|
164 | 163 | Column or index level names to join on. These must be found in both
|
165 | 164 | DataFrames. If `on` is None and not merging on indexes then this defaults
|
|
172 | 171 | Column or index level names to join on in the right DataFrame. Can also
|
173 | 172 | be an array or list of arrays of the length of the right DataFrame.
|
174 | 173 | These arrays are treated as if they are columns.
|
175 |
| -left_index : boolean, default False |
| 174 | +left_index : bool, default False |
176 | 175 | Use the index from the left DataFrame as the join key(s). If it is a
|
177 | 176 | MultiIndex, the number of keys in the other DataFrame (either the index
|
178 | 177 | or a number of columns) must match the number of levels.
|
179 |
| -right_index : boolean, default False |
| 178 | +right_index : bool, default False |
180 | 179 | Use the index from the right DataFrame as the join key. Same caveats as
|
181 | 180 | left_index.
|
182 |
| -sort : boolean, default False |
| 181 | +sort : bool, default False |
183 | 182 | Sort the join keys lexicographically in the result DataFrame. If False,
|
184 | 183 | the order of the join keys depends on the join type (how keyword).
|
185 |
| -suffixes : 2-length sequence (tuple, list, ...) |
| 184 | +suffixes : tuple of (str, str), default ('_x', '_y') |
186 | 185 | Suffix to apply to overlapping column names in the left and right
|
187 |
| - side, respectively. |
188 |
| -copy : boolean, default True |
| 186 | + side, respectively. To raise an exception on overlapping columns use |
| 187 | + (False, False). |
| 188 | +copy : bool, default True |
189 | 189 | If False, avoid copy if possible.
|
190 |
| -indicator : boolean or string, default False |
| 190 | +indicator : bool or str, default False |
191 | 191 | If True, adds a column to output DataFrame called "_merge" with
|
192 | 192 | information on the source of each row.
|
193 | 193 | If string, column with information on source of each row will be added to
|
|
197 | 197 | "right_only" for observations whose merge key only appears in 'right'
|
198 | 198 | DataFrame, and "both" if the observation's merge key is found in both.
|
199 | 199 |
|
200 |
| -validate : string, default None |
| 200 | +validate : str, optional |
201 | 201 | If specified, checks if merge is of specified type.
|
202 | 202 |
|
203 | 203 | * "one_to_one" or "1:1": check if merge keys are unique in both
|
|
213 | 213 | Returns
|
214 | 214 | -------
|
215 | 215 | DataFrame
|
| 216 | + A DataFrame of the two merged objects. |
216 | 217 |
|
217 | 218 | Notes
|
218 | 219 | -----
|
|
229 | 230 | Examples
|
230 | 231 | --------
|
231 | 232 |
|
232 |
| ->>> A = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], |
233 |
| -... 'value': [1, 2, 3, 5]}) |
234 |
| ->>> B = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], |
235 |
| -... 'value': [5, 6, 7, 8]}) |
236 |
| ->>> A |
| 233 | +>>> df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], |
| 234 | +... 'value': [1, 2, 3, 5]}) |
| 235 | +>>> df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], |
| 236 | +... 'value': [5, 6, 7, 8]}) |
| 237 | +>>> df1 |
237 | 238 | lkey value
|
238 | 239 | 0 foo 1
|
239 | 240 | 1 bar 2
|
240 | 241 | 2 baz 3
|
241 | 242 | 3 foo 5
|
242 |
| ->>> B |
| 243 | +>>> df2 |
243 | 244 | rkey value
|
244 | 245 | 0 foo 5
|
245 | 246 | 1 bar 6
|
246 | 247 | 2 baz 7
|
247 | 248 | 3 foo 8
|
248 | 249 |
|
249 |
| ->>> A.merge(B, left_on='lkey', right_on='rkey', how='outer') |
| 250 | +Merge df1 and df2 on the lkey and rkey columns. The value columns have |
| 251 | +the default suffixes, _x and _y, appended. |
| 252 | +
|
| 253 | +>>> df1.merge(df2, left_on='lkey', right_on='rkey') |
250 | 254 | lkey value_x rkey value_y
|
251 | 255 | 0 foo 1 foo 5
|
252 | 256 | 1 foo 1 foo 8
|
253 | 257 | 2 foo 5 foo 5
|
254 | 258 | 3 foo 5 foo 8
|
255 | 259 | 4 bar 2 bar 6
|
256 | 260 | 5 baz 3 baz 7
|
| 261 | +
|
| 262 | +Merge DataFrames df1 and df2 with specified left and right suffixes |
| 263 | +appended to any overlapping columns. |
| 264 | +
|
| 265 | +>>> df1.merge(df2, left_on='lkey', right_on='rkey', |
| 266 | +... suffixes=('_left', '_right')) |
| 267 | + lkey value_left rkey value_right |
| 268 | +0 foo 1 foo 5 |
| 269 | +1 foo 1 foo 8 |
| 270 | +2 foo 5 foo 5 |
| 271 | +3 foo 5 foo 8 |
| 272 | +4 bar 2 bar 6 |
| 273 | +5 baz 3 baz 7 |
| 274 | +
|
| 275 | +Merge DataFrames df1 and df2, but raise an exception if the DataFrames have |
| 276 | +any overlapping columns. |
| 277 | +
|
| 278 | +>>> df1.merge(df2, left_on='lkey', right_on='rkey', suffixes=(False, False)) |
| 279 | +Traceback (most recent call last): |
| 280 | +... |
| 281 | +ValueError: columns overlap but no suffix specified: |
| 282 | + Index(['value'], dtype='object') |
257 | 283 | """
|
258 | 284 |
|
259 | 285 | # -----------------------------------------------------------------------
|
|
0 commit comments