Skip to content

Commit 759401c

Browse files
authored
Cloud Fetch e2e tests (#154)
* Cloud Fetch e2e tests Signed-off-by: Matthew Kim <[email protected]> * Test case works for e2-dogfood shared unity catalog Signed-off-by: Matthew Kim <[email protected]> * Moving test to LargeQueriesSuite and setting catalog to hive_metastore Signed-off-by: Matthew Kim <[email protected]> * Align default value of buffer_size_bytes in driver tests Signed-off-by: Matthew Kim <[email protected]> * Adding comment to specify what's needed to run successfully Signed-off-by: Matthew Kim <[email protected]> --------- Signed-off-by: Matthew Kim <[email protected]>
1 parent 5a34a4a commit 759401c

File tree

1 file changed

+33
-1
lines changed

1 file changed

+33
-1
lines changed

tests/e2e/test_driver.py

+33-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import itertools
12
from contextlib import contextmanager
23
from collections import OrderedDict
34
import datetime
@@ -52,6 +53,7 @@ def __init__(self, method_name):
5253
# If running in local mode, just use environment variables for params.
5354
self.arguments = os.environ if get_args_from_env else {}
5455
self.arraysize = 1000
56+
self.buffer_size_bytes = 104857600
5557

5658
def connection_params(self, arguments):
5759
params = {
@@ -84,7 +86,7 @@ def connection(self, extra_params=()):
8486
@contextmanager
8587
def cursor(self, extra_params=()):
8688
with self.connection(extra_params) as conn:
87-
cursor = conn.cursor(arraysize=self.arraysize)
89+
cursor = conn.cursor(arraysize=self.arraysize, buffer_size_bytes=self.buffer_size_bytes)
8890
try:
8991
yield cursor
9092
finally:
@@ -104,6 +106,36 @@ def get_some_rows(self, cursor, fetchmany_size):
104106
else:
105107
return None
106108

109+
@skipUnless(pysql_supports_arrow(), 'needs arrow support')
110+
def test_cloud_fetch(self):
111+
# This test can take several minutes to run
112+
limits = [100000, 300000]
113+
threads = [10, 25]
114+
self.arraysize = 100000
115+
# This test requires a large table with many rows to properly initiate cloud fetch.
116+
# e2-dogfood host > hive_metastore catalog > main schema has such a table called store_sales.
117+
# If this table is deleted or this test is run on a different host, a different table may need to be used.
118+
base_query = "SELECT * FROM store_sales WHERE ss_sold_date_sk = 2452234 "
119+
for num_limit, num_threads, lz4_compression in itertools.product(limits, threads, [True, False]):
120+
with self.subTest(num_limit=num_limit, num_threads=num_threads, lz4_compression=lz4_compression):
121+
cf_result, noop_result = None, None
122+
query = base_query + "LIMIT " + str(num_limit)
123+
with self.cursor({
124+
"use_cloud_fetch": True,
125+
"max_download_threads": num_threads,
126+
"catalog": "hive_metastore"
127+
}) as cursor:
128+
cursor.execute(query)
129+
cf_result = cursor.fetchall()
130+
with self.cursor({
131+
"catalog": "hive_metastore"
132+
}) as cursor:
133+
cursor.execute(query)
134+
noop_result = cursor.fetchall()
135+
assert len(cf_result) == len(noop_result)
136+
for i in range(len(cf_result)):
137+
assert cf_result[i] == noop_result[i]
138+
107139

108140
# Exclude Retry tests because they require specific setups, and LargeQueries too slow for core
109141
# tests

0 commit comments

Comments
 (0)