diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index 9beab037..7bb0ae3e 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -898,8 +898,12 @@ def execute_command( sessionHandle=session_handle, statement=operation, runAsync=True, - getDirectResults=ttypes.TSparkGetDirectResults( - maxRows=max_rows, maxBytes=max_bytes + # For async operation we don't want the direct results + getDirectResults=None + if async_op + else ttypes.TSparkGetDirectResults( + maxRows=max_rows, + maxBytes=max_bytes, ), canReadArrowResult=True if pyarrow else False, canDecompressLZ4Result=lz4_compression, diff --git a/tests/e2e/common/large_queries_mixin.py b/tests/e2e/common/large_queries_mixin.py index 41ef029b..ed8ac457 100644 --- a/tests/e2e/common/large_queries_mixin.py +++ b/tests/e2e/common/large_queries_mixin.py @@ -94,7 +94,7 @@ def test_long_running_query(self): scale_factor = 1 with self.cursor() as cursor: while duration < min_duration: - assert scale_factor < 512, "Detected infinite loop" + assert scale_factor < 1024, "Detected infinite loop" start = time.time() cursor.execute( diff --git a/tests/e2e/test_driver.py b/tests/e2e/test_driver.py index 2f0881cd..45fea480 100644 --- a/tests/e2e/test_driver.py +++ b/tests/e2e/test_driver.py @@ -177,19 +177,22 @@ def test_cloud_fetch(self): for i in range(len(cf_result)): assert cf_result[i] == noop_result[i] - def test_execute_async(self): - def isExecuting(operation_state): - return not operation_state or operation_state in [ - ttypes.TOperationState.RUNNING_STATE, - ttypes.TOperationState.PENDING_STATE, - ] + +class TestPySQLAsyncQueriesSuite(PySQLPytestTestCase): + def isExecuting(self, operation_state): + return not operation_state or operation_state in [ + ttypes.TOperationState.RUNNING_STATE, + ttypes.TOperationState.PENDING_STATE, + ] + + def test_execute_async__long_running(self): long_running_query = "SELECT COUNT(*) FROM RANGE(10000 * 16) x JOIN RANGE(10000) y ON FROM_UNIXTIME(x.id * y.id, 'yyyy-MM-dd') LIKE '%not%a%date%'" with self.cursor() as cursor: cursor.execute_async(long_running_query) ## Polling after every POLLING_INTERVAL seconds - while isExecuting(cursor.get_query_state()): + while self.isExecuting(cursor.get_query_state()): time.sleep(self.POLLING_INTERVAL) log.info("Polling the status in test_execute_async") @@ -198,6 +201,55 @@ def isExecuting(operation_state): assert result[0].asDict() == {"count(1)": 0} + def test_execute_async__small_result(self): + small_result_query = "SELECT 1" + + with self.cursor() as cursor: + cursor.execute_async(small_result_query) + + ## Fake sleep for 5 secs + time.sleep(5) + + ## Polling after every POLLING_INTERVAL seconds + while self.isExecuting(cursor.get_query_state()): + time.sleep(self.POLLING_INTERVAL) + log.info("Polling the status in test_execute_async") + + cursor.get_async_execution_result() + result = cursor.fetchall() + + assert result[0].asDict() == {"1": 1} + + def test_execute_async__large_result(self): + x_dimension = 1000 + y_dimension = 1000 + large_result_query = f""" + SELECT + x.id AS x_id, + y.id AS y_id, + FROM_UNIXTIME(x.id * y.id, 'yyyy-MM-dd') AS date + FROM + RANGE({x_dimension}) x + JOIN + RANGE({y_dimension}) y + """ + + with self.cursor() as cursor: + cursor.execute_async(large_result_query) + + ## Fake sleep for 5 secs + time.sleep(5) + + ## Polling after every POLLING_INTERVAL seconds + while self.isExecuting(cursor.get_query_state()): + time.sleep(self.POLLING_INTERVAL) + log.info("Polling the status in test_execute_async") + + cursor.get_async_execution_result() + result = cursor.fetchall() + + assert len(result) == x_dimension * y_dimension + # Exclude Retry tests because they require specific setups, and LargeQueries too slow for core # tests