DOC: Added more examples to comparison_with_sql documentation

maxu777 · jreback · commit a61478864d29 · 2016-04-28T13:28:14.000-04:00
closes #12932
diff --git a/doc/source/comparison_with_sql.rst b/doc/source/comparison_with_sql.rst
@@ -372,10 +372,109 @@ In pandas, you can use :meth:`~pandas.concat` in conjunction with
 
     pd.concat([df1, df2]).drop_duplicates()
 
+Pandas equivalents for some SQL analytic and aggregate functions
+----------------------------------------------------------------
+
+Top N rows with offset
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: sql
+
+    -- MySQL
+    SELECT * FROM tips
+    ORDER BY tip DESC
+    LIMIT 10 OFFSET 5;
+
+.. ipython:: python
+
+    tips.nlargest(10+5, columns='tip').tail(10)
+
+Top N rows per group
+~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: sql
+
+    -- Oracle's ROW_NUMBER() analytic function
+    SELECT * FROM (
+      SELECT
+        t.*,
+        ROW_NUMBER() OVER(PARTITION BY day ORDER BY total_bill DESC) AS rn
+      FROM tips t
+    )
+    WHERE rn < 3
+    ORDER BY day, rn;
+
+
+.. ipython:: python
+
+    (tips.assign(rn=tips.sort_values(['total_bill'], ascending=False)
+                        .groupby(['day'])
+                        .cumcount() + 1)
+         .query('rn < 3')
+         .sort_values(['day','rn'])
+    )
+
+the same using `rank(method='first')` function
+
+.. ipython:: python
+
+    (tips.assign(rnk=tips.groupby(['day'])['total_bill']
+                         .rank(method='first', ascending=False))
+         .query('rnk < 3')
+         .sort_values(['day','rnk'])
+    )
+
+.. code-block:: sql
+
+    -- Oracle's RANK() analytic function
+    SELECT * FROM (
+      SELECT
+        t.*,
+        RANK() OVER(PARTITION BY sex ORDER BY tip) AS rnk
+      FROM tips t
+      WHERE tip < 2
+    )
+    WHERE rnk < 3
+    ORDER BY sex, rnk;
+
+Let's find tips with (rank < 3) per gender group for (tips < 2).
+Notice that when using ``rank(method='min')`` function
+`rnk_min` remains the same for the same `tip`
+(as Oracle's RANK() function)
+
+.. ipython:: python
+
+    (tips[tips['tip'] < 2]
+         .assign(rnk_min=tips.groupby(['sex'])['tip']
+                             .rank(method='min'))
+         .query('rnk_min < 3')
+         .sort_values(['sex','rnk_min'])
+    )
+
 
 UPDATE
 ------
 
+.. code-block:: sql
+
+    UPDATE tips
+    SET tip = tip*2
+    WHERE tip < 2;
+
+.. ipython:: python
+
+    tips.loc[tips['tip'] < 2, 'tip'] *= 2
 
 DELETE
 ------
+
+.. code-block:: sql
+
+    DELETE FROM tips
+    WHERE tip > 9;
+
+In pandas we select the rows that should remain, instead of deleting them
+
+.. ipython:: python
+
+    tips = tips.loc[tips['tip'] <= 9]