@@ -264,6 +264,9 @@ def left_join_indexer_unique(
264
264
ndarray[numeric_object_t] left ,
265
265
ndarray[numeric_object_t] right
266
266
):
267
+ """
268
+ Both left and right are strictly monotonic increasing.
269
+ """
267
270
cdef:
268
271
Py_ssize_t i, j, nleft, nright
269
272
ndarray[intp_t] indexer
@@ -311,6 +314,9 @@ def left_join_indexer_unique(
311
314
def left_join_indexer (ndarray[numeric_object_t] left , ndarray[numeric_object_t] right ):
312
315
"""
313
316
Two-pass algorithm for monotonic indexes. Handles many-to-one merges.
317
+
318
+ Both left and right are monotonic increasing, but at least one of them
319
+ is non-unique (if both were unique we'd use left_join_indexer_unique).
314
320
"""
315
321
cdef:
316
322
Py_ssize_t i, j, k, nright, nleft, count
@@ -321,6 +327,7 @@ def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
321
327
nleft = len (left)
322
328
nright = len (right)
323
329
330
+ # First pass is to find the size 'count' of our output indexers.
324
331
i = 0
325
332
j = 0
326
333
count = 0
@@ -334,6 +341,8 @@ def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
334
341
rval = right[j]
335
342
336
343
if lval == rval:
344
+ # This block is identical across
345
+ # left_join_indexer, inner_join_indexer, outer_join_indexer
337
346
count += 1
338
347
if i < nleft - 1 :
339
348
if j < nright - 1 and right[j + 1 ] == rval:
@@ -398,12 +407,14 @@ def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
398
407
# end of the road
399
408
break
400
409
elif lval < rval:
410
+ # i.e. lval not in right; we keep for left_join_indexer
401
411
lindexer[count] = i
402
412
rindexer[count] = - 1
403
- result[count] = left[i]
413
+ result[count] = lval
404
414
count += 1
405
415
i += 1
406
416
else :
417
+ # i.e. rval not in left; we discard for left_join_indexer
407
418
j += 1
408
419
409
420
return result, lindexer, rindexer
@@ -414,6 +425,8 @@ def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
414
425
def inner_join_indexer (ndarray[numeric_object_t] left , ndarray[numeric_object_t] right ):
415
426
"""
416
427
Two-pass algorithm for monotonic indexes. Handles many-to-one merges.
428
+
429
+ Both left and right are monotonic increasing but not necessarily unique.
417
430
"""
418
431
cdef:
419
432
Py_ssize_t i, j, k, nright, nleft, count
@@ -424,6 +437,7 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
424
437
nleft = len (left)
425
438
nright = len (right)
426
439
440
+ # First pass is to find the size 'count' of our output indexers.
427
441
i = 0
428
442
j = 0
429
443
count = 0
@@ -453,8 +467,10 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
453
467
# end of the road
454
468
break
455
469
elif lval < rval:
470
+ # i.e. lval not in right; we discard for inner_indexer
456
471
i += 1
457
472
else :
473
+ # i.e. rval not in left; we discard for inner_indexer
458
474
j += 1
459
475
460
476
# do it again now that result size is known
@@ -478,7 +494,7 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
478
494
if lval == rval:
479
495
lindexer[count] = i
480
496
rindexer[count] = j
481
- result[count] = rval
497
+ result[count] = lval
482
498
count += 1
483
499
if i < nleft - 1 :
484
500
if j < nright - 1 and right[j + 1 ] == rval:
@@ -495,8 +511,10 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
495
511
# end of the road
496
512
break
497
513
elif lval < rval:
514
+ # i.e. lval not in right; we discard for inner_indexer
498
515
i += 1
499
516
else :
517
+ # i.e. rval not in left; we discard for inner_indexer
500
518
j += 1
501
519
502
520
return result, lindexer, rindexer
@@ -505,6 +523,9 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
505
523
@ cython.wraparound (False )
506
524
@ cython.boundscheck (False )
507
525
def outer_join_indexer (ndarray[numeric_object_t] left , ndarray[numeric_object_t] right ):
526
+ """
527
+ Both left and right are monotonic increasing but not necessarily unique.
528
+ """
508
529
cdef:
509
530
Py_ssize_t i, j, nright, nleft, count
510
531
numeric_object_t lval, rval
@@ -514,6 +535,9 @@ def outer_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
514
535
nleft = len (left)
515
536
nright = len (right)
516
537
538
+ # First pass is to find the size 'count' of our output indexers.
539
+ # count will be length of left plus the number of elements of right not in
540
+ # left (counting duplicates)
517
541
i = 0
518
542
j = 0
519
543
count = 0
@@ -616,12 +640,14 @@ def outer_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
616
640
# end of the road
617
641
break
618
642
elif lval < rval:
643
+ # i.e. lval not in right; we keep for outer_join_indexer
619
644
lindexer[count] = i
620
645
rindexer[count] = - 1
621
646
result[count] = lval
622
647
count += 1
623
648
i += 1
624
649
else :
650
+ # i.e. rval not in left; we keep for outer_join_indexer
625
651
lindexer[count] = - 1
626
652
rindexer[count] = j
627
653
result[count] = rval
0 commit comments