Skip to content

Commit bd9aa2b

Browse files
committed
Added Benchmarking
1 parent 24730dd commit bd9aa2b

File tree

7 files changed

+186
-42
lines changed

7 files changed

+186
-42
lines changed

benchmarking/conftest.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import os
2+
import pytest
3+
4+
5+
@pytest.fixture(scope="session")
6+
def benchmarking_host():
7+
return os.getenv("BENCHMARKING_SERVER_HOSTNAME")
8+
9+
10+
@pytest.fixture(scope="session")
11+
def benchmarking_http_path():
12+
return os.getenv("BENCHMARKING_HTTP_PATH")
13+
14+
15+
@pytest.fixture(scope="session")
16+
def benchmarking_access_token():
17+
return os.getenv("BENCHMARKING_TOKEN")
18+
19+
20+
@pytest.fixture(scope="session")
21+
def benchfood_host():
22+
return os.getenv("BENCHFOOD_SERVER_HOSTNAME")
23+
24+
25+
@pytest.fixture(scope="session")
26+
def benchfood_http_path():
27+
return os.getenv("BENCHFOOD_HTTP_PATH")
28+
29+
30+
@pytest.fixture(scope="session")
31+
def benchfood_access_token():
32+
return os.getenv("BENCHFOOD_TOKEN")
33+
34+
35+
@pytest.fixture(scope="session", autouse=True)
36+
def connection_details(benchmarking_host, benchmarking_http_path, benchmarking_access_token, benchfood_host, benchfood_http_path, benchfood_access_token):
37+
return {
38+
"benchmarking_host": benchmarking_host,
39+
"benchmarking_http_path": benchmarking_http_path,
40+
"benchmarking_access_token": benchmarking_access_token,
41+
"benchfood_host": benchfood_host,
42+
"benchfood_http_path": benchfood_http_path,
43+
"benchfood_access_token": benchfood_access_token,
44+
}

benchmarking/test_benchmark.py

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
import random
2+
import time
3+
from databricks import sql
4+
import logging
5+
import pytest
6+
from contextlib import contextmanager
7+
from datetime import datetime
8+
log = logging.getLogger(__name__)
9+
10+
11+
class TestBenchmarkingSuite:
12+
13+
# TAG = "PRE-SPLIT"
14+
TAG = "POST-SPLIT"
15+
CATALOG_NAME = "main"
16+
SCHEMA_NAME = "tpcds_sf100_delta"
17+
TABLE_NAME = "catalog_sales"
18+
RESULTS_TABLE = "main.pysql_benchmarking_schema.benchmarking_results"
19+
ATTEMPTS = 10
20+
ROWS = 1000000
21+
LARGE_QUERY_LIMIT = 1000000
22+
SMALL_QUERY_LIMIT = 10000
23+
24+
@pytest.fixture(autouse=True)
25+
def get_details(self, connection_details):
26+
self.arguments = connection_details.copy()
27+
28+
self.benchmarking_connection_params = {
29+
"server_hostname": self.arguments["benchmarking_host"],
30+
"http_path": self.arguments["benchmarking_http_path"],
31+
"access_token": self.arguments["benchmarking_access_token"]
32+
}
33+
34+
self.benchfood_connection_params = {
35+
"server_hostname": self.arguments["benchfood_host"],
36+
"http_path": self.arguments["benchfood_http_path"],
37+
"access_token": self.arguments["benchfood_access_token"]
38+
}
39+
40+
@contextmanager
41+
def connection(self, connection_params):
42+
log.info("Connecting with args: {}".format(connection_params))
43+
conn = sql.connect(**connection_params)
44+
45+
try:
46+
yield conn
47+
finally:
48+
conn.close()
49+
50+
@contextmanager
51+
def cursor(self, connection_params):
52+
with self.connection(connection_params) as conn:
53+
cursor = conn.cursor()
54+
try:
55+
yield cursor
56+
finally:
57+
cursor.close()
58+
59+
def removed_outlier_mean(self, data):
60+
total = 0
61+
for i in range(1, len(data)-1):
62+
total += data[i]
63+
64+
return total/(len(data)-2)
65+
66+
def insert_benchmarking_results_data(self, function_name, query_time):
67+
68+
log.info(f"Inserting results {self.TAG} - {function_name}")
69+
with self.cursor(self.benchfood_connection_params) as cursor:
70+
cursor.execute(
71+
f"INSERT INTO {self.RESULTS_TABLE} (tag, function_name, compute_duration, date_time) VALUES ('{self.TAG}', '{function_name}', {query_time}, '{datetime.now()}')"
72+
)
73+
74+
def get_query_time(self, query, expected_num_rows):
75+
start_time = time.time()
76+
with self.cursor(self.benchmarking_connection_params) as cursor:
77+
cursor.execute(query)
78+
result = cursor.fetchall()
79+
log.info("Fetched {} rows".format(len(result)))
80+
81+
assert len(result) == expected_num_rows
82+
83+
end_time = time.time()
84+
elapsed_time = end_time - start_time
85+
86+
return elapsed_time
87+
88+
def test_large_queries_performance(self):
89+
compute_duration = []
90+
function_name = "large_query"
91+
92+
for i in range(0, self.ATTEMPTS):
93+
log.info("Attempt: {}".format(i))
94+
offset = i * self.LARGE_QUERY_LIMIT + random.randint(1, self.LARGE_QUERY_LIMIT)
95+
96+
query = "select * from {}.{}.{} LIMIT {} OFFSET {}".format(self.CATALOG_NAME, self.SCHEMA_NAME, self.TABLE_NAME, self.LARGE_QUERY_LIMIT, offset)
97+
compute_duration.append(self.get_query_time(query, self.LARGE_QUERY_LIMIT))
98+
99+
compute_duration.sort()
100+
self.insert_benchmarking_results_data(function_name, self.removed_outlier_mean(compute_duration))
101+
102+
def test_small_queries_performance(self):
103+
compute_duration = []
104+
function_name = "small_query"
105+
106+
for i in range(0, self.ATTEMPTS):
107+
log.info("Attempt: {}".format(i))
108+
offset = i * self.SMALL_QUERY_LIMIT + random.randint(1, self.SMALL_QUERY_LIMIT)
109+
110+
query = "select * from {}.{}.{} LIMIT {} OFFSET {}".format(self.CATALOG_NAME, self.SCHEMA_NAME, self.TABLE_NAME, self.SMALL_QUERY_LIMIT, offset)
111+
compute_duration.append(self.get_query_time(query, self.SMALL_QUERY_LIMIT))
112+
113+
compute_duration.sort()
114+
self.insert_benchmarking_results_data(function_name, self.removed_outlier_mean(compute_duration))
115+
116+

check.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,20 @@
2121
# Load environment variables from .env file
2222
# load_dotenv()
2323

24-
host = "e2-dogfood.staging.cloud.databricks.com"
25-
http_path = "/sql/1.0/warehouses/58aa1b363649e722"
24+
host = os.getenv("MY_SERVER_HOSTNAME")
25+
http_path = os.getenv("MY_HTTP_PATH")
26+
access_token = os.getenv("MY_TOKEN")
2627

27-
access_token = ""
2828
connection = sql.connect(
2929
server_hostname=host,
3030
http_path=http_path,
3131
access_token=access_token)
3232

3333

3434
cursor = connection.cursor()
35-
cursor.execute('SELECT :param `p`, * FROM RANGE(10)', {"param": "foo"})
35+
cursor.execute("select * from `auto_maintenance_bugbash`.`tpcds_sf1000_naga_testv32`.`store_sales` LIMIT 1000")
3636
# cursor.execute('SELECT 1')
37-
result = cursor.fetchall()
37+
result = cursor.fetchmany(10)
3838
for row in result:
3939
print(row)
4040

Binary file not shown.
Binary file not shown.

databricks_sql_connector/pyproject.toml

Lines changed: 20 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -10,53 +10,37 @@ include = ["CHANGELOG.md"]
1010

1111
[tool.poetry.dependencies]
1212
python = "^3.8.0"
13-
#thrift = ">=0.16.0,<0.21.0"
14-
#pandas = [
15-
# { version = ">=1.2.5,<2.2.0", python = ">=3.8" }
16-
#]
17-
#pyarrow = ">=14.0.1,<17"
18-
19-
#lz4 = "^4.0.2"
20-
#requests = "^2.18.1"
21-
#oauthlib = "^3.1.0"
22-
#numpy = [
23-
# { version = "^1.16.6", python = ">=3.8,<3.11" },
24-
# { version = "^1.23.4", python = ">=3.11" },
25-
#]
13+
# Remainging to add databricks_sql_connector_core
2614
databricks_sqlalchemy = { version = ">=1.0.0", optional = true }
27-
#openpyxl = "^3.0.10"
28-
#alembic = { version = "^1.0.11", optional = true }
29-
#urllib3 = ">=1.26"
30-
#
15+
3116
[tool.poetry.extras]
3217
databricks_sqlalchemy = ["databricks_sqlalchemy"]
33-
#alembic = ["sqlalchemy", "alembic"]
34-
#
35-
#[tool.poetry.dev-dependencies]
36-
#pytest = "^7.1.2"
37-
#mypy = "^1.10.1"
38-
#pylint = ">=2.12.0"
39-
#black = "^22.3.0"
40-
#pytest-dotenv = "^0.5.2"
4118

42-
#[tool.poetry.urls]
43-
#"Homepage" = "https://github.com/databricks/databricks-sql-python"
44-
#"Bug Tracker" = "https://github.com/databricks/databricks-sql-python/issues"
19+
[tool.poetry.dev-dependencies]
20+
pytest = "^7.1.2"
21+
mypy = "^1.10.1"
22+
pylint = ">=2.12.0"
23+
black = "^22.3.0"
24+
pytest-dotenv = "^0.5.2"
25+
26+
[tool.poetry.urls]
27+
"Homepage" = "https://github.com/databricks/databricks-sql-python"
28+
"Bug Tracker" = "https://github.com/databricks/databricks-sql-python/issues"
4529

4630
[tool.poetry.plugins."sqlalchemy.dialects"]
4731
"databricks" = "databricks_sqlalchemy:DatabricksDialect"
4832

4933
[build-system]
5034
requires = ["poetry-core>=1.0.0"]
5135
build-backend = "poetry.core.masonry.api"
52-
#
53-
#[tool.mypy]
54-
#ignore_missing_imports = "true"
55-
#exclude = ['ttypes\.py$', 'TCLIService\.py$']
56-
#
57-
#[tool.black]
58-
#exclude = '/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist|thrift_api)/'
59-
#
36+
37+
[tool.mypy]
38+
ignore_missing_imports = "true"
39+
exclude = ['ttypes\.py$', 'TCLIService\.py$']
40+
41+
[tool.black]
42+
exclude = '/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist|thrift_api)/'
43+
6044
[tool.pytest.ini_options]
6145
markers = {"reviewed" = "Test case has been reviewed by Databricks"}
6246
minversion = "6.0"

setup_script.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,4 @@ def build_and_install_library(directory_name):
2828
if __name__ == "__main__":
2929
build_and_install_library("databricks_sql_connector_core")
3030
build_and_install_library("databricks_sql_connector")
31-
build_and_install_library("databricks_sqlalchemy")
31+
# build_and_install_library("databricks_sqlalchemy")

0 commit comments

Comments
 (0)