Skip to content

Commit 3b51f71

Browse files
authored
rename get_column_by_name to col (#290)
* rename get_column_by_name to col * type DataFrame, Column, and GroupBy as Protocol * add py.typed * iterable argument in select * update tpch q1
1 parent fc12020 commit 3b51f71

File tree

7 files changed

+18
-22
lines changed

7 files changed

+18
-22
lines changed

spec/API_specification/dataframe_api/dataframe_object.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ def group_by(self, *keys: str) -> GroupBy:
9292
"""
9393
...
9494

95-
def get_column_by_name(self, name: str, /) -> Column:
95+
def col(self, name: str, /) -> Column:
9696
"""
9797
Select a column by name.
9898
@@ -195,7 +195,7 @@ def assign(self, *columns: Column) -> Self:
195195
196196
.. code-block:: python
197197
198-
new_column = df.get_column_by_name('a') + 1
198+
new_column = df.col('a') + 1
199199
df = df.assign(new_column.rename('b'))
200200
201201
Parameters

spec/API_specification/dataframe_api/py.typed

Whitespace-only changes.

spec/API_specification/examples/01_standardise_columns.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ def my_dataframe_agnostic_function(df_non_standard: SupportsDataFrameAPI) -> Any
1111
for column_name in df.column_names:
1212
if column_name == 'species':
1313
continue
14-
new_column = df.get_column_by_name(column_name)
14+
new_column = df.col(column_name)
1515
new_column = (new_column - new_column.mean()) / new_column.std()
1616
df = df.assign(new_column.rename(f'{column_name}_scaled'))
1717

spec/API_specification/examples/02_plotting.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def group_by_and_plot(
2525
)
2626

2727
agg = df.group_by("color").mean().fill_null(float('nan'))
28-
x = agg.get_column_by_name("x").to_array()
29-
y = agg.get_column_by_name("y").to_array()
28+
x = agg.col("x").to_array()
29+
y = agg.col("y").to_array()
3030

3131
my_plotting_function(x, y)

spec/API_specification/examples/tpch/q1.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,16 @@ def query(lineitem_raw: SupportsDataFrameAPI) -> Any:
88
lineitem = lineitem_raw.__dataframe_consortium_standard__()
99
namespace = lineitem.__dataframe_namespace__()
1010

11-
mask = lineitem.get_column_by_name("l_shipdate") <= namespace.date(1998, 9, 2)
11+
mask = lineitem.col("l_shipdate") <= namespace.date(1998, 9, 2)
1212
lineitem = lineitem.assign(
1313
(
14-
lineitem.get_column_by_name("l_extended_price")
15-
* (1 - lineitem.get_column_by_name("l_discount"))
14+
lineitem.col("l_extended_price")
15+
* (1 - lineitem.col("l_discount"))
1616
).rename("l_disc_price"),
1717
(
18-
lineitem.get_column_by_name("l_extended_price")
19-
* (1 - lineitem.get_column_by_name("l_discount"))
20-
* (1 + lineitem.get_column_by_name("l_tax"))
18+
lineitem.col("l_extended_price")
19+
* (1 - lineitem.col("l_discount"))
20+
* (1 + lineitem.col("l_tax"))
2121
).rename("l_charge"),
2222
)
2323
result = (

spec/API_specification/examples/tpch/q5.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -53,19 +53,15 @@ def query(
5353
)
5454
)
5555
mask = (
56-
(
57-
result.get_column_by_name("c_nationkey")
58-
== result.get_column_by_name("s_nationkey")
59-
)
60-
& (result.get_column_by_name("r_name") == "ASIA")
61-
& (result.get_column_by_name("o_orderdate") >= namespace.date(1994, 1, 1))
62-
& (result.get_column_by_name("o_orderdate") < namespace.date(1995, 1, 1))
56+
(result.col("c_nationkey") == result.col("s_nationkey"))
57+
& (result.col("r_name") == "ASIA")
58+
& (result.col("o_orderdate") >= namespace.date(1994, 1, 1))
59+
& (result.col("o_orderdate") < namespace.date(1995, 1, 1))
6360
)
6461
result = result.filter(mask)
6562

6663
new_column = (
67-
result.get_column_by_name("l_extendedprice")
68-
* (1 - result.get_column_by_name("l_discount"))
64+
result.col("l_extendedprice") * (1 - result.col("l_discount"))
6965
).rename("revenue")
7066
result = result.assign(new_column)
7167
result = result.group_by("n_name").aggregate(namespace.Aggregation.sum("revenue"))

spec/design_topics/python_builtin_types.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,14 @@ the `float` it is documented to return, in combination with the `__gt__` method
1414
class DataFrame:
1515
def __gt__(self, other: DataFrame | Scalar) -> DataFrame:
1616
...
17-
def get_column_by_name(self, name: str, /) -> Column:
17+
def col(self, name: str, /) -> Column:
1818
...
1919

2020
class Column:
2121
def mean(self, skip_nulls: bool = True) -> float | NullType:
2222
...
2323

24-
larger = df2 > df1.get_column_by_name('foo').mean()
24+
larger = df2 > df1.col('foo').mean()
2525
```
2626

2727
For a GPU dataframe library, it is desirable for all data to reside on the GPU,

0 commit comments

Comments
 (0)