1
1
from enum import Enum
2
2
from typing import Optional , Union , TYPE_CHECKING
3
+ from databricks .sql .exc import RequestError
3
4
from databricks .sql .results import ResultSet
4
5
6
+ from datetime import datetime
7
+
5
8
from dataclasses import dataclass
6
9
7
10
if TYPE_CHECKING :
@@ -19,11 +22,23 @@ class AsyncExecutionException(Exception):
19
22
pass
20
23
21
24
25
+ class AsyncExecutionUnrecoverableResultException (AsyncExecutionException ):
26
+ """Raised when a result can never be retrieved for this query id."""
27
+
28
+ pass
29
+
30
+
22
31
@dataclass
23
32
class FakeCursor :
24
33
active_op_handle : Optional [ttypes .TOperationHandle ]
25
34
26
35
36
+ @dataclass
37
+ class FakeExecuteStatementResponse :
38
+ directResults : bool
39
+ operationHandle : ttypes .TOperationHandle
40
+
41
+
27
42
class AsyncExecutionStatus (Enum ):
28
43
"""An enum that represents the status of an async execution"""
29
44
@@ -35,6 +50,7 @@ class AsyncExecutionStatus(Enum):
35
50
36
51
# todo: when is this ever evaluated?
37
52
ABORTED = 5
53
+ UNKNOWN = 6
38
54
39
55
40
56
def _toperationstate_to_ae_status (
@@ -54,52 +70,77 @@ def _toperationstate_to_ae_status(
54
70
55
71
class AsyncExecution :
56
72
"""
57
- A class that represents an async execution of a query.
58
-
59
- AsyncExecutions are effectively connectionless. But because thrift_backend is entangled
60
- with client.py, the AsyncExecution needs access to both a Connection and a ThriftBackend
61
-
62
- This will need to be refactored for cleanliness in the future.
73
+ A handle for a query execution on Databricks.
63
74
"""
64
75
65
76
_connection : "Connection"
66
77
_thrift_backend : "ThriftBackend"
67
78
_result_set : Optional ["ResultSet" ]
68
- _execute_statement_response : Optional [ttypes .TExecuteStatementResp ]
79
+ _execute_statement_response : Optional [
80
+ Union [FakeExecuteStatementResponse , ttypes .TExecuteStatementResp ]
81
+ ]
82
+ _last_sync_timestamp : Optional [datetime ] = None
83
+ _result_set : Optional ["ResultSet" ] = None
69
84
70
85
def __init__ (
71
86
self ,
72
87
thrift_backend : "ThriftBackend" ,
73
88
connection : "Connection" ,
74
89
query_id : UUID ,
75
90
query_secret : UUID ,
76
- status : AsyncExecutionStatus ,
77
- execute_statement_response : Optional [ttypes .TExecuteStatementResp ] = None ,
91
+ status : Optional [AsyncExecutionStatus ] = AsyncExecutionStatus .UNKNOWN ,
92
+ execute_statement_response : Optional [
93
+ Union [FakeExecuteStatementResponse , ttypes .TExecuteStatementResp ]
94
+ ] = None ,
78
95
):
79
96
self ._connection = connection
80
97
self ._thrift_backend = thrift_backend
81
- self ._execute_statement_response = execute_statement_response
82
98
self .query_id = query_id
83
99
self .query_secret = query_secret
84
100
self .status = status
85
101
102
+ if execute_statement_response :
103
+ self ._execute_statement_response = execute_statement_response
104
+ else :
105
+ self ._execute_statement_response = FakeExecuteStatementResponse (
106
+ directResults = False , operationHandle = self .t_operation_handle
107
+ )
108
+
86
109
status : AsyncExecutionStatus
87
110
query_id : UUID
88
111
89
- def get_result (self ) -> "ResultSet" :
90
- """Get a result set for this async execution
112
+ def get_result (
113
+ self ,
114
+ ) -> "ResultSet" :
115
+ """Attempt to get the result of this query and set self.status to FETCHED.
116
+
117
+ IMPORTANT: Generally, you'll call this method only after checking that the query is finished.
118
+ But you can call it at any time. If you call this method while the query is still running,
119
+ your code will block indefinitely until the query completes! This will be changed in a
120
+ subsequent release (PECO-1291)
121
+
122
+ If you have already called get_result successfully, this method will return the same ResultSet
123
+ as before without making an additional roundtrip to the server.
91
124
92
- Raises an exception if the query is still running or has been canceled.
125
+ Raises an AsyncExecutionUnrecoverableResultException if the query was canceled or aborted
126
+ at the server, so a result will never be available.
93
127
"""
94
128
95
- if self .status == AsyncExecutionStatus .CANCELED :
96
- raise AsyncExecutionException ("Query was canceled: %s" % self .query_id )
97
- if self .is_running :
98
- raise AsyncExecutionException ("Query is still running: %s" % self .query_id )
99
- if self .status == AsyncExecutionStatus .FINISHED :
100
- self ._thrift_fetch_result ()
101
- if self .status == AsyncExecutionStatus .FETCHED :
102
- return self ._result_set
129
+ # this isn't recoverable
130
+ if self .status in [AsyncExecutionStatus .ABORTED , AsyncExecutionStatus .CANCELED ]:
131
+ raise AsyncExecutionUnrecoverableResultException (
132
+ "Result for %s is not recoverable. Query status is %s"
133
+ % (self .query_id , self .status ),
134
+ )
135
+
136
+ return self ._get_result_set ()
137
+
138
+ def _get_result_set (self ) -> "ResultSet" :
139
+ if self ._result_set is None :
140
+ self ._result_set = self ._thrift_fetch_result ()
141
+ self .status = AsyncExecutionStatus .FETCHED
142
+
143
+ return self ._result_set
103
144
104
145
def cancel (self ) -> None :
105
146
"""Cancel the query"""
@@ -111,42 +152,60 @@ def _thrift_cancel_operation(self) -> None:
111
152
_output = self ._thrift_backend .async_cancel_command (self .t_operation_handle )
112
153
self .status = AsyncExecutionStatus .CANCELED
113
154
114
- def poll_for_status (self ) -> None :
115
- """Check the thrift server for the status of this operation and set self.status
155
+ def _thrift_get_operation_status (self ) -> ttypes .TGetOperationStatusResp :
156
+ """Execute TGetOperationStatusReq
157
+
158
+ Raises an AsyncExecutionError if the query_id:query_secret pair is not found on the server.
159
+ """
160
+ try :
161
+ return self ._thrift_backend ._poll_for_status (self .t_operation_handle )
162
+ except RequestError as e :
163
+ if "RESOURCE_DOES_NOT_EXIST" in e .message :
164
+ raise AsyncExecutionException (
165
+ "Query not found: %s" % self .query_id
166
+ ) from e
116
167
117
- This will result in an error if the operation has been canceled or aborted at the server"""
168
+ def serialize (self ) -> str :
169
+ """Return a string representing the query_id and secret of this AsyncExecution.
118
170
119
- _output = self . _thrift_backend . _poll_for_status ( self . t_operation_handle )
120
- self .status = _toperationstate_to_ae_status ( _output . operationState )
171
+ Use this to preserve a reference to the query_id and query_secret."""
172
+ return f" { self .query_id } : { self . query_secret } "
121
173
122
- def _thrift_fetch_result (self ) -> None :
123
- """Execute TFetchResultReq and store the result """
174
+ def sync_status (self ) -> None :
175
+ """Synchronise the status of this AsyncExecution with the server query execution state. """
124
176
125
- # A cursor is required here to hook into the thrift_backend result fetching API
126
- # TODO: need to rewrite this to use a generic result fetching API so we can
127
- # support JSON and Thrift binary result formats in addition to arrow.
177
+ resp = self . _thrift_get_operation_status ()
178
+ self . status = _toperationstate_to_ae_status ( resp . operationState )
179
+ self . _last_sync_timestamp = datetime . now ()
128
180
129
- # in the case of direct results this creates a second cursor...how can I avoid that?
181
+ def _thrift_fetch_result (self ) -> "ResultSet" :
182
+ """Execute TFetchResultReq"""
130
183
131
184
er = self ._thrift_backend ._handle_execute_response (
132
185
self ._execute_statement_response , FakeCursor (None )
133
186
)
134
187
135
- self . _result_set = ResultSet (
188
+ return ResultSet (
136
189
connection = self ._connection ,
137
190
execute_response = er ,
138
191
thrift_backend = self ._connection .thrift_backend ,
139
192
)
140
193
141
- self .status = AsyncExecutionStatus .FETCHED
142
-
143
194
@property
144
195
def is_running (self ) -> bool :
145
196
return self .status in [
146
197
AsyncExecutionStatus .RUNNING ,
147
198
AsyncExecutionStatus .PENDING ,
148
199
]
149
200
201
+ @property
202
+ def is_canceled (self ) -> bool :
203
+ return self .status == AsyncExecutionStatus .CANCELED
204
+
205
+ @property
206
+ def is_finished (self ) -> bool :
207
+ return self .status == AsyncExecutionStatus .FINISHED
208
+
150
209
@property
151
210
def t_operation_handle (self ) -> ttypes .TOperationHandle :
152
211
"""Return the current AsyncExecution as a Thrift TOperationHandle"""
@@ -161,6 +220,11 @@ def t_operation_handle(self) -> ttypes.TOperationHandle:
161
220
162
221
return handle
163
222
223
+ @property
224
+ def last_sync_timestamp (self ) -> Optional [datetime ]:
225
+ """The timestamp of the last time self.status was synced with the server"""
226
+ return self ._last_sync_timestamp
227
+
164
228
@classmethod
165
229
def from_thrift_response (
166
230
cls ,
@@ -180,3 +244,28 @@ def from_thrift_response(
180
244
),
181
245
execute_statement_response = resp ,
182
246
)
247
+
248
+ @classmethod
249
+ def from_query_id_and_secret (
250
+ cls ,
251
+ connection : "Connection" ,
252
+ thrift_backend : "ThriftBackend" ,
253
+ query_id : UUID ,
254
+ query_secret : UUID ,
255
+ ) -> "AsyncExecution" :
256
+ """Return a valid AsyncExecution object from a query_id and query_secret.
257
+
258
+ Raises an AsyncExecutionException if the query_id:query_secret pair is not found on the server.
259
+ """
260
+
261
+ # build a copy of this execution
262
+ ae = cls (
263
+ connection = connection ,
264
+ thrift_backend = thrift_backend ,
265
+ query_id = query_id ,
266
+ query_secret = query_secret ,
267
+ )
268
+ # check to make sure this is a valid one
269
+ ae .sync_status ()
270
+
271
+ return ae
0 commit comments