@@ -372,10 +372,109 @@ In pandas, you can use :meth:`~pandas.concat` in conjunction with
372
372
373
373
pd.concat([df1, df2]).drop_duplicates()
374
374
375
+ Pandas equivalents for some SQL analytic and aggregate functions
376
+ ----------------------------------------------------------------
377
+
378
+ Top N rows with offset
379
+ ~~~~~~~~~~~~~~~~~~~~~~
380
+
381
+ .. code-block :: sql
382
+
383
+ -- MySQL
384
+ SELECT * FROM tips
385
+ ORDER BY tip DESC
386
+ LIMIT 10 OFFSET 5;
387
+
388
+ .. ipython :: python
389
+
390
+ tips.nlargest(10 + 5 , columns = ' tip' ).tail(10 )
391
+
392
+ Top N rows per group
393
+ ~~~~~~~~~~~~~~~~~~~~
394
+
395
+ .. code-block :: sql
396
+
397
+ -- Oracle's ROW_NUMBER() analytic function
398
+ SELECT * FROM (
399
+ SELECT
400
+ t.*,
401
+ ROW_NUMBER() OVER(PARTITION BY day ORDER BY total_bill DESC) AS rn
402
+ FROM tips t
403
+ )
404
+ WHERE rn < 3
405
+ ORDER BY day, rn;
406
+
407
+
408
+ .. ipython :: python
409
+
410
+ (tips.assign(rn = tips.sort_values([' total_bill' ], ascending = False )
411
+ .groupby([' day' ])
412
+ .cumcount() + 1 )
413
+ .query(' rn < 3' )
414
+ .sort_values([' day' ,' rn' ])
415
+ )
416
+
417
+ the same using `rank(method='first') ` function
418
+
419
+ .. ipython :: python
420
+
421
+ (tips.assign(rnk = tips.groupby([' day' ])[' total_bill' ]
422
+ .rank(method = ' first' , ascending = False ))
423
+ .query(' rnk < 3' )
424
+ .sort_values([' day' ,' rnk' ])
425
+ )
426
+
427
+ .. code-block :: sql
428
+
429
+ -- Oracle's RANK() analytic function
430
+ SELECT * FROM (
431
+ SELECT
432
+ t.*,
433
+ RANK() OVER(PARTITION BY sex ORDER BY tip) AS rnk
434
+ FROM tips t
435
+ WHERE tip < 2
436
+ )
437
+ WHERE rnk < 3
438
+ ORDER BY sex, rnk;
439
+
440
+ Let's find tips with (rank < 3) per gender group for (tips < 2).
441
+ Notice that when using ``rank(method='min') `` function
442
+ `rnk_min ` remains the same for the same `tip `
443
+ (as Oracle's RANK() function)
444
+
445
+ .. ipython :: python
446
+
447
+ (tips[tips[' tip' ] < 2 ]
448
+ .assign(rnk_min = tips.groupby([' sex' ])[' tip' ]
449
+ .rank(method = ' min' ))
450
+ .query(' rnk_min < 3' )
451
+ .sort_values([' sex' ,' rnk_min' ])
452
+ )
453
+
375
454
376
455
UPDATE
377
456
------
378
457
458
+ .. code-block :: sql
459
+
460
+ UPDATE tips
461
+ SET tip = tip*2
462
+ WHERE tip < 2;
463
+
464
+ .. ipython :: python
465
+
466
+ tips.loc[tips[' tip' ] < 2 , ' tip' ] *= 2
379
467
380
468
DELETE
381
469
------
470
+
471
+ .. code-block :: sql
472
+
473
+ DELETE FROM tips
474
+ WHERE tip > 9;
475
+
476
+ In pandas we select the rows that should remain, instead of deleting them
477
+
478
+ .. ipython :: python
479
+
480
+ tips = tips.loc[tips[' tip' ] <= 9 ]
0 commit comments