Skip to content

Commit e2a18d4

Browse files
authored
Add TPC-H query 5 to examples (#285)
* add tpch q5 example * simplify * post rebase fixup * add original query
1 parent f12057f commit e2a18d4

File tree

3 files changed

+77
-2
lines changed

3 files changed

+77
-2
lines changed

spec/API_specification/dataframe_api/dataframe_object.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
if TYPE_CHECKING:
77
from .column_object import Column
88
from .groupby_object import GroupBy
9-
from .typing import NullType, Scalar, Namespace, DType
9+
from .typing import NullType, Scalar, Namespace, DType, SupportsDataFrameAPI
1010

1111

1212
__all__ = ["DataFrame"]
@@ -51,7 +51,7 @@ def __dataframe_namespace__(self) -> Namespace:
5151
"""
5252

5353
@property
54-
def dataframe(self) -> object:
54+
def dataframe(self) -> SupportsDataFrameAPI:
5555
"""
5656
Return underlying (not-necessarily-Standard-compliant) DataFrame.
5757
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
"""
2+
Original query:
3+
4+
SELECT n_name, SUM(l_extendedprice * (1 - l_discount)) AS revenue
5+
FROM customer, orders, lineitem, supplier, nation, region
6+
WHERE c_custkey = o_custkey
7+
AND l_orderkey = o_orderkey
8+
AND l_suppkey = s_suppkey
9+
AND c_nationkey = s_nationkey
10+
AND s_nationkey = n_nationkey
11+
AND n_regionkey = r_regionkey
12+
AND r_name = 'ASIA'
13+
AND o_orderdate >= MDY(1,1,1994)
14+
AND o_orderdate < MDY(1,1,1994) + 1 UNITS YEAR
15+
GROUP BY n_name
16+
ORDER BY revenue DESC
17+
"""
18+
from __future__ import annotations
19+
20+
from typing import TYPE_CHECKING
21+
22+
if TYPE_CHECKING:
23+
from dataframe_api.typing import SupportsDataFrameAPI
24+
25+
26+
def query(
27+
customer_raw: SupportsDataFrameAPI,
28+
orders_raw: SupportsDataFrameAPI,
29+
lineitem_raw: SupportsDataFrameAPI,
30+
supplier_raw: SupportsDataFrameAPI,
31+
nation_raw: SupportsDataFrameAPI,
32+
region_raw: SupportsDataFrameAPI,
33+
) -> SupportsDataFrameAPI:
34+
customer = customer_raw.__dataframe_consortium_standard__()
35+
orders = orders_raw.__dataframe_consortium_standard__()
36+
lineitem = lineitem_raw.__dataframe_consortium_standard__()
37+
supplier = supplier_raw.__dataframe_consortium_standard__()
38+
nation = nation_raw.__dataframe_consortium_standard__()
39+
region = region_raw.__dataframe_consortium_standard__()
40+
41+
namespace = customer.__dataframe_namespace__()
42+
43+
result = (
44+
region.join(nation, how="inner", left_on="r_regionkey", right_on="n_regionkey")
45+
.join(customer, how="inner", left_on="n_nationkey", right_on="c_nationkey")
46+
.join(orders, how="inner", left_on="c_custkey", right_on="o_custkey")
47+
.join(lineitem, how="inner", left_on="o_orderkey", right_on="l_orderkey")
48+
.join(
49+
supplier,
50+
how="inner",
51+
left_on=["l_suppkey", "n_nationkey"],
52+
right_on=["s_suppkey", "s_nationkey"],
53+
)
54+
)
55+
mask = (
56+
(
57+
result.get_column_by_name("c_nationkey")
58+
== result.get_column_by_name("s_nationkey")
59+
)
60+
& (result.get_column_by_name("r_name") == "ASIA")
61+
& (result.get_column_by_name("o_orderdate") >= namespace.date(1994, 1, 1)) # type: ignore
62+
& (result.get_column_by_name("o_orderdate") < namespace.date(1995, 1, 1)) # type: ignore
63+
)
64+
result = result.filter(mask)
65+
66+
new_column = (
67+
result.get_column_by_name("l_extendedprice")
68+
* (1 - result.get_column_by_name("l_discount"))
69+
).rename("revenue")
70+
result = result.assign(new_column)
71+
result = result.select(["revenue", "n_name"])
72+
result = result.group_by("n_name").sum()
73+
74+
return result.dataframe

spec/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@
8686
('py:class', 'optional'),
8787
('py:class', 'NullType'),
8888
('py:class', 'Namespace'),
89+
('py:class', 'SupportsDataFrameAPI'),
8990
]
9091
# NOTE: this alias handling isn't used yet - added in anticipation of future
9192
# need based on dataframe API aliases.

0 commit comments

Comments
 (0)