Skip to content

Commit 86b3867

Browse files
jorisvandenbosscheildipo
authored andcommitted
apacheGH-33976: [Python] Clean-up Acero related declarations in libarrow_acero.pxd (apache#34773)
### What changes are included in this PR? I removed some of the declarations that are now no longer used (after the refactoring for apache#33976) ### Are there any user-facing changes? No * Issue: apache#33976 Lead-authored-by: Joris Van den Bossche <[email protected]> Co-authored-by: Davide Pasetto <[email protected]> Signed-off-by: Joris Van den Bossche <[email protected]>
1 parent 2d53e62 commit 86b3867

File tree

2 files changed

+10
-53
lines changed

2 files changed

+10
-53
lines changed

python/pyarrow/includes/libarrow_acero.pxd

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,6 @@ cdef extern from "arrow/acero/options.h" namespace "arrow::acero" nogil:
4646
CJoinType_RIGHT_OUTER "arrow::acero::JoinType::RIGHT_OUTER"
4747
CJoinType_FULL_OUTER "arrow::acero::JoinType::FULL_OUTER"
4848

49-
cdef cppclass CAsyncExecBatchGenerator "arrow::acero::AsyncExecBatchGenerator":
50-
pass
51-
5249
cdef cppclass CExecNodeOptions "arrow::acero::ExecNodeOptions":
5350
pass
5451

@@ -73,10 +70,6 @@ cdef extern from "arrow/acero/options.h" namespace "arrow::acero" nogil:
7370
cdef cppclass CAggregateNodeOptions "arrow::acero::AggregateNodeOptions"(CExecNodeOptions):
7471
CAggregateNodeOptions(vector[CAggregate] aggregates, vector[CFieldRef] names)
7572

76-
cdef cppclass COrderBySinkNodeOptions "arrow::acero::OrderBySinkNodeOptions"(CExecNodeOptions):
77-
COrderBySinkNodeOptions(vector[CSortOptions] options,
78-
CAsyncExecBatchGenerator generator)
79-
8073
cdef cppclass COrderByNodeOptions "arrow::acero::OrderByNodeOptions"(CExecNodeOptions):
8174
COrderByNodeOptions(COrdering ordering)
8275

@@ -114,38 +107,10 @@ cdef extern from "arrow/acero/exec_plan.h" namespace "arrow::acero" nogil:
114107
@staticmethod
115108
CDeclaration Sequence(vector[CDeclaration] decls)
116109

117-
CResult[CExecNode*] AddToPlan(CExecPlan* plan) const
118-
119-
cdef cppclass CExecPlan "arrow::acero::ExecPlan":
120-
@staticmethod
121-
CResult[shared_ptr[CExecPlan]] Make(CExecContext* exec_context)
122-
123-
void StartProducing()
124-
CStatus Validate()
125-
CStatus StopProducing()
126-
127-
CFuture_Void finished()
128-
129-
vector[CExecNode*] sinks() const
130-
vector[CExecNode*] sources() const
131-
132110
cdef cppclass CExecNode "arrow::acero::ExecNode":
133111
const vector[CExecNode*]& inputs() const
134112
const shared_ptr[CSchema]& output_schema() const
135113

136-
cdef cppclass CExecBatch "arrow::acero::ExecBatch":
137-
vector[CDatum] values
138-
int64_t length
139-
140-
shared_ptr[CRecordBatchReader] MakeGeneratorReader(
141-
shared_ptr[CSchema] schema,
142-
CAsyncExecBatchGenerator gen,
143-
CMemoryPool* memory_pool
144-
)
145-
CResult[CExecNode*] MakeExecNode(c_string factory_name, CExecPlan* plan,
146-
vector[CExecNode*] inputs,
147-
const CExecNodeOptions& options)
148-
149114
CResult[shared_ptr[CTable]] DeclarationToTable(
150115
CDeclaration declaration, c_bool use_threads
151116
)

python/pyarrow/tests/test_acero.py

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@
2222
from pyarrow.compute import field
2323

2424
from pyarrow.acero import (
25-
TableSourceNodeOptions,
2625
Declaration,
26+
TableSourceNodeOptions,
2727
FilterNodeOptions,
2828
ProjectNodeOptions,
2929
AggregateNodeOptions,
30+
OrderByNodeOptions,
3031
HashJoinNodeOptions,
31-
OrderByNodeOptions
3232
)
3333

3434
try:
@@ -122,8 +122,7 @@ def test_project(table_source):
122122
# provide name
123123
decl = Declaration.from_sequence([
124124
table_source,
125-
Declaration("project", ProjectNodeOptions(
126-
[pc.multiply(field("a"), 2)], ["a2"]))
125+
Declaration("project", ProjectNodeOptions([pc.multiply(field("a"), 2)], ["a2"]))
127126
])
128127
result = decl.to_table()
129128
assert result.schema.names == ["a2"]
@@ -145,8 +144,7 @@ def test_project(table_source):
145144
def test_aggregate_scalar(table_source):
146145
decl = Declaration.from_sequence([
147146
table_source,
148-
Declaration("aggregate", AggregateNodeOptions(
149-
[("a", "sum", None, "a_sum")]))
147+
Declaration("aggregate", AggregateNodeOptions([("a", "sum", None, "a_sum")]))
150148
])
151149
result = decl.to_table()
152150
assert result.schema.names == ["a_sum"]
@@ -245,30 +243,26 @@ def test_order_by():
245243
table_source = Declaration("table_source", TableSourceNodeOptions(table))
246244

247245
ord_opts = OrderByNodeOptions([("b", "ascending")])
248-
decl = Declaration.from_sequence(
249-
[table_source, Declaration("order_by", ord_opts)])
246+
decl = Declaration.from_sequence([table_source, Declaration("order_by", ord_opts)])
250247
result = decl.to_table()
251248
expected = pa.table({"a": [1, 4, 2, 3], "b": [1, 2, 3, None]})
252249
assert result.equals(expected)
253250

254251
ord_opts = OrderByNodeOptions([(field("b"), "descending")])
255-
decl = Declaration.from_sequence(
256-
[table_source, Declaration("order_by", ord_opts)])
252+
decl = Declaration.from_sequence([table_source, Declaration("order_by", ord_opts)])
257253
result = decl.to_table()
258254
expected = pa.table({"a": [2, 4, 1, 3], "b": [3, 2, 1, None]})
259255
assert result.equals(expected)
260256

261257
ord_opts = OrderByNodeOptions([(1, "descending")], null_placement="at_start")
262-
decl = Declaration.from_sequence(
263-
[table_source, Declaration("order_by", ord_opts)])
258+
decl = Declaration.from_sequence([table_source, Declaration("order_by", ord_opts)])
264259
result = decl.to_table()
265260
expected = pa.table({"a": [3, 2, 4, 1], "b": [None, 3, 2, 1]})
266261
assert result.equals(expected)
267262

268263
# emtpy ordering
269264
ord_opts = OrderByNodeOptions([])
270-
decl = Declaration.from_sequence(
271-
[table_source, Declaration("order_by", ord_opts)])
265+
decl = Declaration.from_sequence([table_source, Declaration("order_by", ord_opts)])
272266
with pytest.raises(
273267
ValueError, match="`ordering` must be an explicit non-empty ordering"
274268
):
@@ -283,11 +277,9 @@ def test_order_by():
283277

284278
def test_hash_join():
285279
left = pa.table({'key': [1, 2, 3], 'a': [4, 5, 6]})
286-
left_source = Declaration(
287-
"table_source", options=TableSourceNodeOptions(left))
280+
left_source = Declaration("table_source", options=TableSourceNodeOptions(left))
288281
right = pa.table({'key': [2, 3, 4], 'b': [4, 5, 6]})
289-
right_source = Declaration(
290-
"table_source", options=TableSourceNodeOptions(right))
282+
right_source = Declaration("table_source", options=TableSourceNodeOptions(right))
291283

292284
# inner join
293285
join_opts = HashJoinNodeOptions("inner", left_keys="key", right_keys="key")

0 commit comments

Comments
 (0)