jprakash-db · jprakash-db · Aug 29, 2024
@@ -0,0 +1,44 @@
+import os
+import pytest
+
+
+@pytest.fixture(scope="session")
+def benchmarking_host():
+    return os.getenv("BENCHMARKING_SERVER_HOSTNAME")
+
+
+@pytest.fixture(scope="session")
+def benchmarking_http_path():
+    return os.getenv("BENCHMARKING_HTTP_PATH")
+
+
+@pytest.fixture(scope="session")
+def benchmarking_access_token():
+    return os.getenv("BENCHMARKING_TOKEN")
+
+
+@pytest.fixture(scope="session")
+def benchfood_host():
+    return os.getenv("BENCHFOOD_SERVER_HOSTNAME")
+
+
+@pytest.fixture(scope="session")
+def benchfood_http_path():
+    return os.getenv("BENCHFOOD_HTTP_PATH")
+
+
+@pytest.fixture(scope="session")
+def benchfood_access_token():
+    return os.getenv("BENCHFOOD_TOKEN")
+
+
+@pytest.fixture(scope="session", autouse=True)
+def connection_details(benchmarking_host, benchmarking_http_path, benchmarking_access_token, benchfood_host, benchfood_http_path, benchfood_access_token):
+    return {
+        "benchmarking_host": benchmarking_host,
+        "benchmarking_http_path": benchmarking_http_path,
+        "benchmarking_access_token": benchmarking_access_token,
+        "benchfood_host": benchfood_host,
+        "benchfood_http_path": benchfood_http_path,
+        "benchfood_access_token": benchfood_access_token,
+    }
@@ -0,0 +1,116 @@
+import random
+import time
+from databricks import sql
+import logging
+import pytest
+from contextlib import contextmanager
+from datetime import datetime
+log = logging.getLogger(__name__)
+
+
+class TestBenchmarkingSuite:
+
+    # TAG = "PRE-SPLIT"
+    TAG = "POST-SPLIT"
+    CATALOG_NAME = "main"
+    SCHEMA_NAME = "tpcds_sf100_delta"
+    TABLE_NAME = "catalog_sales"
+    RESULTS_TABLE = "main.pysql_benchmarking_schema.benchmarking_results"
+    ATTEMPTS = 10
+    ROWS = 1000000
+    LARGE_QUERY_LIMIT = 1000000
+    SMALL_QUERY_LIMIT = 10000
+
+    @pytest.fixture(autouse=True)
+    def get_details(self, connection_details):
+        self.arguments = connection_details.copy()
+
+        self.benchmarking_connection_params = {
+            "server_hostname": self.arguments["benchmarking_host"],
+            "http_path": self.arguments["benchmarking_http_path"],
+            "access_token": self.arguments["benchmarking_access_token"]
+        }
+
+        self.benchfood_connection_params = {
+            "server_hostname": self.arguments["benchfood_host"],
+            "http_path": self.arguments["benchfood_http_path"],
+            "access_token": self.arguments["benchfood_access_token"]
+        }
+
+    @contextmanager
+    def connection(self, connection_params):
+        log.info("Connecting with args: {}".format(connection_params))
+        conn = sql.connect(**connection_params)
+
+        try:
+            yield conn
+        finally:
+            conn.close()
+
+    @contextmanager
+    def cursor(self, connection_params):
+        with self.connection(connection_params) as conn:
+            cursor = conn.cursor()
+            try:
+                yield cursor
+            finally:
+                cursor.close()
+
+    def removed_outlier_mean(self, data):
+        total = 0
+        for i in range(1, len(data)-1):
+            total += data[i]
+
+        return total/(len(data)-2)
+
+    def insert_benchmarking_results_data(self, function_name, query_time):
+
+        log.info(f"Inserting results {self.TAG} - {function_name}")
+        with self.cursor(self.benchfood_connection_params) as cursor:
+            cursor.execute(
+                f"INSERT INTO {self.RESULTS_TABLE} (tag, function_name, compute_duration, date_time) VALUES ('{self.TAG}', '{function_name}', {query_time}, '{datetime.now()}')"
+            )
+
+    def get_query_time(self, query, expected_num_rows):
+        start_time = time.time()
+        with self.cursor(self.benchmarking_connection_params) as cursor:
+            cursor.execute(query)
+            result = cursor.fetchall()
+            log.info("Fetched {} rows".format(len(result)))
+
+            assert len(result) == expected_num_rows
+
+        end_time = time.time()
+        elapsed_time = end_time - start_time
+
+        return elapsed_time
+
+    def test_large_queries_performance(self):
+        compute_duration = []
+        function_name = "large_query"
+
+        for i in range(0, self.ATTEMPTS):
+            log.info("Attempt: {}".format(i))
+            offset = i * self.LARGE_QUERY_LIMIT + random.randint(1, self.LARGE_QUERY_LIMIT)
+
+            query = "select * from {}.{}.{} LIMIT {} OFFSET {}".format(self.CATALOG_NAME, self.SCHEMA_NAME, self.TABLE_NAME, self.LARGE_QUERY_LIMIT, offset)
+            compute_duration.append(self.get_query_time(query, self.LARGE_QUERY_LIMIT))
+
+        compute_duration.sort()
+        self.insert_benchmarking_results_data(function_name, self.removed_outlier_mean(compute_duration))
+
+    def test_small_queries_performance(self):
+        compute_duration = []
+        function_name = "small_query"
+
+        for i in range(0, self.ATTEMPTS):
+            log.info("Attempt: {}".format(i))
+            offset = i * self.SMALL_QUERY_LIMIT + random.randint(1, self.SMALL_QUERY_LIMIT)
+
+            query = "select * from {}.{}.{} LIMIT {} OFFSET {}".format(self.CATALOG_NAME, self.SCHEMA_NAME, self.TABLE_NAME, self.SMALL_QUERY_LIMIT, offset)
+            compute_duration.append(self.get_query_time(query, self.SMALL_QUERY_LIMIT))
+
+        compute_duration.sort()
+        self.insert_benchmarking_results_data(function_name, self.removed_outlier_mean(compute_duration))
+
+
@@ -21,20 +21,20 @@
 # Load environment variables from .env file
 # load_dotenv()
 
-host = "e2-dogfood.staging.cloud.databricks.com"
-http_path = "/sql/1.0/warehouses/58aa1b363649e722"
+host = os.getenv("MY_SERVER_HOSTNAME")
+http_path = os.getenv("MY_HTTP_PATH")
+access_token = os.getenv("MY_TOKEN")
 
-access_token = ""
 connection = sql.connect(
     server_hostname=host,
     http_path=http_path,
     access_token=access_token)
 
 
 cursor = connection.cursor()
-cursor.execute('SELECT :param `p`, * FROM RANGE(10)', {"param": "foo"})
+cursor.execute("select * from `auto_maintenance_bugbash`.`tpcds_sf1000_naga_testv32`.`store_sales` LIMIT 1000")
 # cursor.execute('SELECT 1')
-result = cursor.fetchall()
+result = cursor.fetchmany(10)
 for row in result:
     print(row)
 

@@ -10,53 +10,37 @@ include = ["CHANGELOG.md"]
 
 [tool.poetry.dependencies]
 python = "^3.8.0"
-#thrift = ">=0.16.0,<0.21.0"
-#pandas = [
-#    { version = ">=1.2.5,<2.2.0", python = ">=3.8" }
-#]
-#pyarrow = ">=14.0.1,<17"
-
-#lz4 = "^4.0.2"
-#requests = "^2.18.1"
-#oauthlib = "^3.1.0"
-#numpy = [
-#    { version = "^1.16.6", python = ">=3.8,<3.11" },
-#    { version = "^1.23.4", python = ">=3.11" },
-#]
+# Remainging to add databricks_sql_connector_core
 databricks_sqlalchemy = { version = ">=1.0.0", optional = true }
-#openpyxl = "^3.0.10"
-#alembic = { version = "^1.0.11", optional = true }
-#urllib3 = ">=1.26"
-#
+
 [tool.poetry.extras]
 databricks_sqlalchemy = ["databricks_sqlalchemy"]
-#alembic = ["sqlalchemy", "alembic"]
-#
-#[tool.poetry.dev-dependencies]
-#pytest = "^7.1.2"
-#mypy = "^1.10.1"
-#pylint = ">=2.12.0"
-#black = "^22.3.0"
-#pytest-dotenv = "^0.5.2"
 
-#[tool.poetry.urls]
-#"Homepage" = "https://github.com/databricks/databricks-sql-python"
-#"Bug Tracker" = "https://github.com/databricks/databricks-sql-python/issues"
+[tool.poetry.dev-dependencies]
+pytest = "^7.1.2"
+mypy = "^1.10.1"
+pylint = ">=2.12.0"
+black = "^22.3.0"
+pytest-dotenv = "^0.5.2"
+
+[tool.poetry.urls]
+"Homepage" = "https://github.com/databricks/databricks-sql-python"
+"Bug Tracker" = "https://github.com/databricks/databricks-sql-python/issues"
 
 [tool.poetry.plugins."sqlalchemy.dialects"]
 "databricks" = "databricks_sqlalchemy:DatabricksDialect"
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
-#
-#[tool.mypy]
-#ignore_missing_imports = "true"
-#exclude = ['ttypes\.py$', 'TCLIService\.py$']
-#
-#[tool.black]
-#exclude = '/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist|thrift_api)/'
-#
+
+[tool.mypy]
+ignore_missing_imports = "true"
+exclude = ['ttypes\.py$', 'TCLIService\.py$']
+
+[tool.black]
+exclude = '/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist|thrift_api)/'
+
 [tool.pytest.ini_options]
 markers = {"reviewed" = "Test case has been reviewed by Databricks"}
 minversion = "6.0"

@@ -28,4 +28,4 @@ def build_and_install_library(directory_name):
 if __name__ == "__main__":
     build_and_install_library("databricks_sql_connector_core")
     build_and_install_library("databricks_sql_connector")
-    build_and_install_library("databricks_sqlalchemy")
+    # build_and_install_library("databricks_sqlalchemy")