@@ -223,7 +223,14 @@ def merge_ordered(
223
223
224
224
Examples
225
225
--------
226
- >>> A
226
+ >>> df1 = pd.DataFrame(
227
+ ... {
228
+ ... "key": ["a", "c", "e", "a", "c", "e"],
229
+ ... "lvalue": [1, 2, 3, 1, 2, 3],
230
+ ... "group": ["a", "a", "a", "b", "b", "b"]
231
+ ... }
232
+ ... )
233
+ >>> df1
227
234
key lvalue group
228
235
0 a 1 a
229
236
1 c 2 a
@@ -232,24 +239,25 @@ def merge_ordered(
232
239
4 c 2 b
233
240
5 e 3 b
234
241
235
- >>> B
236
- Key rvalue
237
- 0 b 1
238
- 1 c 2
239
- 2 d 3
240
-
241
- >>> merge_ordered(A, B, fill_method='ffill', left_by='group')
242
- group key lvalue rvalue
243
- 0 a a 1 NaN
244
- 1 a b 1 1.0
245
- 2 a c 2 2.0
246
- 3 a d 2 3.0
247
- 4 a e 3 3.0
248
- 5 b a 1 NaN
249
- 6 b b 1 1.0
250
- 7 b c 2 2.0
251
- 8 b d 2 3.0
252
- 9 b e 3 3.0
242
+ >>> df2 = pd.DataFrame({"key": ["b", "c", "d"], "rvalue": [1, 2, 3]})
243
+ >>> df2
244
+ key rvalue
245
+ 0 b 1
246
+ 1 c 2
247
+ 2 d 3
248
+
249
+ >>> merge_ordered(df1, df2, fill_method="ffill", left_by="group")
250
+ key lvalue group rvalue
251
+ 0 a 1 a NaN
252
+ 1 b 1 a 1.0
253
+ 2 c 2 a 2.0
254
+ 3 d 2 a 3.0
255
+ 4 e 3 a 3.0
256
+ 5 a 1 b NaN
257
+ 6 b 1 b 1.0
258
+ 7 c 2 b 2.0
259
+ 8 d 2 b 3.0
260
+ 9 e 3 b 3.0
253
261
"""
254
262
255
263
def _merger (x , y ):
@@ -370,15 +378,14 @@ def merge_asof(
370
378
371
379
Examples
372
380
--------
373
- >>> left = pd.DataFrame({'a' : [1, 5, 10], ' left_val' : ['a', 'b', 'c' ]})
381
+ >>> left = pd.DataFrame({"a" : [1, 5, 10], " left_val" : ["a", "b", "c" ]})
374
382
>>> left
375
383
a left_val
376
384
0 1 a
377
385
1 5 b
378
386
2 10 c
379
387
380
- >>> right = pd.DataFrame({'a': [1, 2, 3, 6, 7],
381
- ... 'right_val': [1, 2, 3, 6, 7]})
388
+ >>> right = pd.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]})
382
389
>>> right
383
390
a right_val
384
391
0 1 1
@@ -387,41 +394,40 @@ def merge_asof(
387
394
3 6 6
388
395
4 7 7
389
396
390
- >>> pd.merge_asof(left, right, on='a' )
397
+ >>> pd.merge_asof(left, right, on="a" )
391
398
a left_val right_val
392
399
0 1 a 1
393
400
1 5 b 3
394
401
2 10 c 7
395
402
396
- >>> pd.merge_asof(left, right, on='a' , allow_exact_matches=False)
403
+ >>> pd.merge_asof(left, right, on="a" , allow_exact_matches=False)
397
404
a left_val right_val
398
405
0 1 a NaN
399
406
1 5 b 3.0
400
407
2 10 c 7.0
401
408
402
- >>> pd.merge_asof(left, right, on='a' , direction=' forward' )
409
+ >>> pd.merge_asof(left, right, on="a" , direction=" forward" )
403
410
a left_val right_val
404
411
0 1 a 1.0
405
412
1 5 b 6.0
406
413
2 10 c NaN
407
414
408
- >>> pd.merge_asof(left, right, on='a' , direction=' nearest' )
415
+ >>> pd.merge_asof(left, right, on="a" , direction=" nearest" )
409
416
a left_val right_val
410
417
0 1 a 1
411
418
1 5 b 6
412
419
2 10 c 7
413
420
414
421
We can use indexed DataFrames as well.
415
422
416
- >>> left = pd.DataFrame({' left_val' : ['a', 'b', 'c' ]}, index=[1, 5, 10])
423
+ >>> left = pd.DataFrame({" left_val" : ["a", "b", "c" ]}, index=[1, 5, 10])
417
424
>>> left
418
425
left_val
419
426
1 a
420
427
5 b
421
428
10 c
422
429
423
- >>> right = pd.DataFrame({'right_val': [1, 2, 3, 6, 7]},
424
- ... index=[1, 2, 3, 6, 7])
430
+ >>> right = pd.DataFrame({"right_val": [1, 2, 3, 6, 7]}, index=[1, 2, 3, 6, 7])
425
431
>>> right
426
432
right_val
427
433
1 1
@@ -438,6 +444,32 @@ def merge_asof(
438
444
439
445
Here is a real-world times-series example
440
446
447
+ >>> quotes = pd.DataFrame(
448
+ ... {
449
+ ... "time": [
450
+ ... pd.Timestamp("2016-05-25 13:30:00.023"),
451
+ ... pd.Timestamp("2016-05-25 13:30:00.023"),
452
+ ... pd.Timestamp("2016-05-25 13:30:00.030"),
453
+ ... pd.Timestamp("2016-05-25 13:30:00.041"),
454
+ ... pd.Timestamp("2016-05-25 13:30:00.048"),
455
+ ... pd.Timestamp("2016-05-25 13:30:00.049"),
456
+ ... pd.Timestamp("2016-05-25 13:30:00.072"),
457
+ ... pd.Timestamp("2016-05-25 13:30:00.075")
458
+ ... ],
459
+ ... "ticker": [
460
+ ... "GOOG",
461
+ ... "MSFT",
462
+ ... "MSFT",
463
+ ... "MSFT",
464
+ ... "GOOG",
465
+ ... "AAPL",
466
+ ... "GOOG",
467
+ ... "MSFT"
468
+ ... ],
469
+ ... "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],
470
+ ... "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03]
471
+ ... }
472
+ ... )
441
473
>>> quotes
442
474
time ticker bid ask
443
475
0 2016-05-25 13:30:00.023 GOOG 720.50 720.93
@@ -449,6 +481,20 @@ def merge_asof(
449
481
6 2016-05-25 13:30:00.072 GOOG 720.50 720.88
450
482
7 2016-05-25 13:30:00.075 MSFT 52.01 52.03
451
483
484
+ >>> trades = pd.DataFrame(
485
+ ... {
486
+ ... "time": [
487
+ ... pd.Timestamp("2016-05-25 13:30:00.023"),
488
+ ... pd.Timestamp("2016-05-25 13:30:00.038"),
489
+ ... pd.Timestamp("2016-05-25 13:30:00.048"),
490
+ ... pd.Timestamp("2016-05-25 13:30:00.048"),
491
+ ... pd.Timestamp("2016-05-25 13:30:00.048")
492
+ ... ],
493
+ ... "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
494
+ ... "price": [51.95, 51.95, 720.77, 720.92, 98.0],
495
+ ... "quantity": [75, 155, 100, 100, 100]
496
+ ... }
497
+ ... )
452
498
>>> trades
453
499
time ticker price quantity
454
500
0 2016-05-25 13:30:00.023 MSFT 51.95 75
@@ -459,9 +505,7 @@ def merge_asof(
459
505
460
506
By default we are taking the asof of the quotes
461
507
462
- >>> pd.merge_asof(trades, quotes,
463
- ... on='time',
464
- ... by='ticker')
508
+ >>> pd.merge_asof(trades, quotes, on="time", by="ticker")
465
509
time ticker price quantity bid ask
466
510
0 2016-05-25 13:30:00.023 MSFT 51.95 75 51.95 51.96
467
511
1 2016-05-25 13:30:00.038 MSFT 51.95 155 51.97 51.98
@@ -471,10 +515,9 @@ def merge_asof(
471
515
472
516
We only asof within 2ms between the quote time and the trade time
473
517
474
- >>> pd.merge_asof(trades, quotes,
475
- ... on='time',
476
- ... by='ticker',
477
- ... tolerance=pd.Timedelta('2ms'))
518
+ >>> pd.merge_asof(
519
+ ... trades, quotes, on="time", by="ticker", tolerance=pd.Timedelta("2ms")
520
+ ... )
478
521
time ticker price quantity bid ask
479
522
0 2016-05-25 13:30:00.023 MSFT 51.95 75 51.95 51.96
480
523
1 2016-05-25 13:30:00.038 MSFT 51.95 155 NaN NaN
@@ -486,11 +529,14 @@ def merge_asof(
486
529
and we exclude exact matches on time. However *prior* data will
487
530
propagate forward
488
531
489
- >>> pd.merge_asof(trades, quotes,
490
- ... on='time',
491
- ... by='ticker',
492
- ... tolerance=pd.Timedelta('10ms'),
493
- ... allow_exact_matches=False)
532
+ >>> pd.merge_asof(
533
+ ... trades,
534
+ ... quotes,
535
+ ... on="time",
536
+ ... by="ticker",
537
+ ... tolerance=pd.Timedelta("10ms"),
538
+ ... allow_exact_matches=False
539
+ ... )
494
540
time ticker price quantity bid ask
495
541
0 2016-05-25 13:30:00.023 MSFT 51.95 75 NaN NaN
496
542
1 2016-05-25 13:30:00.038 MSFT 51.95 155 51.97 51.98
0 commit comments