From 09f9548076ce4f48c1edc90844456083c94fffe1 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 23 Jun 2023 18:04:22 -0500 Subject: [PATCH 01/10] Make it easy to snoop log messages --- CONTRIBUTING.md | 2 ++ pyproject.toml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 53ec9735..6ab8b45f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -118,6 +118,8 @@ access_token="dapi***" staging_ingestion_user="***@example.com" ``` +To see logging output from pytest while running tests, set `log_cli = "true"` under `tool.pytest.ini_options` in `pyproject.toml`. You can also set `log_cli_level` to any of the default Python log levels: `DEBUG`, `INFO`, `WARNING`, `ERROR`, `CRITICAL` + There are several e2e test suites available: - `PySQLCoreTestSuite` - `PySQLLargeQueriesSuite` diff --git a/pyproject.toml b/pyproject.toml index f522d1ce..f651421e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,8 @@ exclude = '/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck [tool.pytest.ini_options] minversion = "6.0" +log_cli = "false" +log_cli_level = "INFO" testpaths = [ "tests" ] From d232823a1d9181c555eb73c7e8f4abbcfc80e32a Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 23 Jun 2023 18:07:03 -0500 Subject: [PATCH 02/10] Add a method for getting a human readable GUID for the session handle --- src/databricks/sql/client.py | 3 +++ src/databricks/sql/thrift_backend.py | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py index 722ed778..95de21b8 100644 --- a/src/databricks/sql/client.py +++ b/src/databricks/sql/client.py @@ -213,6 +213,9 @@ def __del__(self): def get_session_id(self): return self.thrift_backend.handle_to_id(self._session_handle) + + def get_session_id_hex(self): + return self.thrift_backend.handle_to_hex_id(self._session_handle) def cursor( self, diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index b4afeaa3..56f664cf 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -3,6 +3,7 @@ import logging import math import time +import uuid import threading import lz4.frame from ssl import CERT_NONE, CERT_REQUIRED, create_default_context @@ -1021,3 +1022,8 @@ def cancel_command(self, active_op_handle): @staticmethod def handle_to_id(session_handle): return session_handle.sessionId.guid + + @staticmethod + def handle_to_hex_id(session_handle: TCLIService.TSessionHandle): + this_uuid = uuid.UUID(bytes=session_handle.sessionId.guid) + return str(this_uuid) From 4141536de540e9893ae316013461d3ccb8dcc506 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 23 Jun 2023 18:08:29 -0500 Subject: [PATCH 03/10] Don't raise an exception if connection was already closed --- src/databricks/sql/client.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py index 95de21b8..8f939b8f 100644 --- a/src/databricks/sql/client.py +++ b/src/databricks/sql/client.py @@ -247,7 +247,14 @@ def _close(self, close_cursors=True) -> None: if close_cursors: for cursor in self._cursors: cursor.close() - self.thrift_backend.close_session(self._session_handle) + try: + self.thrift_backend.close_session(self._session_handle) + except DatabaseError as e: + if "Invalid SessionHandle" in str(e): + pass + else: + raise e + self.open = False def commit(self): From 088091d339ff802fd2ec79c8fabe746fe3bb5b72 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 23 Jun 2023 18:09:12 -0500 Subject: [PATCH 04/10] Log when we try to close a closed session. Test to make sure this works. --- src/databricks/sql/client.py | 4 +++- tests/e2e/test_driver.py | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py index 8f939b8f..36b6f886 100644 --- a/src/databricks/sql/client.py +++ b/src/databricks/sql/client.py @@ -190,7 +190,7 @@ def read(self) -> Optional[OAuthToken]: session_configuration, catalog, schema ) self.open = True - logger.info("Successfully opened session " + str(self.get_session_id())) + logger.info("Successfully opened session " + str(self.get_session_id_hex())) self._cursors = [] # type: List[Cursor] def __enter__(self): @@ -248,9 +248,11 @@ def _close(self, close_cursors=True) -> None: for cursor in self._cursors: cursor.close() try: + logger.info(f"Closing session {self.get_session_id_hex()}") self.thrift_backend.close_session(self._session_handle) except DatabaseError as e: if "Invalid SessionHandle" in str(e): + logger.warning(f"Attempted to close session that was already closed: {e}") pass else: raise e diff --git a/tests/e2e/test_driver.py b/tests/e2e/test_driver.py index 37428116..0947bf5e 100644 --- a/tests/e2e/test_driver.py +++ b/tests/e2e/test_driver.py @@ -616,6 +616,20 @@ def test_close_connection_closes_cursors(self): assert "RESOURCE_DOES_NOT_EXIST" in cm.exception.message + def test_closing_a_closed_connection_doesnt_fail(self): + + with self.assertLogs("databricks.sql", "WARNING") as cm: + with self.connection() as conn: + conn.close() + + expected_message_was_found = False + for log in cm.output: + if expected_message_was_found: + break + target = "Attempted to close session that was already closed" + expected_message_was_found = target in log + + self.assertTrue(expected_message_was_found, "Did not find expected log messages") # use a RetrySuite to encapsulate these tests which we'll typically want to run together; however keep From d110e9fff6a1400f245008c33cbce06af2709c88 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 23 Jun 2023 18:15:07 -0500 Subject: [PATCH 05/10] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d3f8831..35a8e0aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## 2.6.x (Unreleased) +- Fix: connector raised exception when calling close() on a closed Thrift session - Improve e2e test development ergonomics - Redact logged thrift responses by default - Add support for OAuth on Databricks Azure From e85ab0d8f744efd99aa65fd97f5eb478d17e875d Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 23 Jun 2023 18:18:48 -0500 Subject: [PATCH 06/10] Update comments for clarity --- tests/e2e/test_driver.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/e2e/test_driver.py b/tests/e2e/test_driver.py index 0947bf5e..748aa949 100644 --- a/tests/e2e/test_driver.py +++ b/tests/e2e/test_driver.py @@ -619,7 +619,9 @@ def test_close_connection_closes_cursors(self): def test_closing_a_closed_connection_doesnt_fail(self): with self.assertLogs("databricks.sql", "WARNING") as cm: + # Second .close() call is when this context manager exits with self.connection() as conn: + # First .close() call is explicit here conn.close() expected_message_was_found = False From 49c52a53454a1c0c876a93622bb4b868d3645445 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 23 Jun 2023 19:17:49 -0500 Subject: [PATCH 07/10] Update test.env.example for sqlalchemy tests --- test.env.example | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test.env.example b/test.env.example index 5ce7eca5..94aed419 100644 --- a/test.env.example +++ b/test.env.example @@ -4,4 +4,8 @@ http_path="" access_token="" # Only required to run the PySQLStagingIngestionTestSuite -staging_ingestion_user="" \ No newline at end of file +staging_ingestion_user="" + +# Only required to run SQLAlchemy tests +catalog="" +schema="" \ No newline at end of file From 8e2ff765a215eb953905ba726ec643e3ed4f33d2 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 23 Jun 2023 19:18:23 -0500 Subject: [PATCH 08/10] Black source files --- src/databricks/sql/client.py | 6 ++++-- src/databricks/sql/thrift_backend.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py index 36b6f886..4ab82f11 100644 --- a/src/databricks/sql/client.py +++ b/src/databricks/sql/client.py @@ -213,7 +213,7 @@ def __del__(self): def get_session_id(self): return self.thrift_backend.handle_to_id(self._session_handle) - + def get_session_id_hex(self): return self.thrift_backend.handle_to_hex_id(self._session_handle) @@ -252,7 +252,9 @@ def _close(self, close_cursors=True) -> None: self.thrift_backend.close_session(self._session_handle) except DatabaseError as e: if "Invalid SessionHandle" in str(e): - logger.warning(f"Attempted to close session that was already closed: {e}") + logger.warning( + f"Attempted to close session that was already closed: {e}" + ) pass else: raise e diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index 56f664cf..7756c56a 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -1022,7 +1022,7 @@ def cancel_command(self, active_op_handle): @staticmethod def handle_to_id(session_handle): return session_handle.sessionId.guid - + @staticmethod def handle_to_hex_id(session_handle: TCLIService.TSessionHandle): this_uuid = uuid.UUID(bytes=session_handle.sessionId.guid) From bef151e6ef83ed505df8ee1265eccfc150fe177b Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Mon, 26 Jun 2023 12:43:53 -0500 Subject: [PATCH 09/10] PR: swallow and log all exceptions when closing connections Signed-off-by: Jesse Whitehouse --- src/databricks/sql/client.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py index 4ab82f11..5f6951f8 100644 --- a/src/databricks/sql/client.py +++ b/src/databricks/sql/client.py @@ -255,9 +255,12 @@ def _close(self, close_cursors=True) -> None: logger.warning( f"Attempted to close session that was already closed: {e}" ) - pass else: - raise e + logger.warning( + f"Attempt to close session raised an exception at the server: {e}" + ) + except Exception as e: + logger.error(f"Attempt to close session raised a local exception: {e}") self.open = False From 975fce6ac1edae213b6404a5f7ad5e18389f8d3c Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Mon, 26 Jun 2023 13:27:15 -0500 Subject: [PATCH 10/10] Fix: the server stopped raising an exception when we send multiple session close RPCs. Updated logging for clarity and modified e2e test Signed-off-by: Jesse Whitehouse --- src/databricks/sql/client.py | 6 +++++- tests/e2e/test_driver.py | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py index 5f6951f8..14e59df6 100644 --- a/src/databricks/sql/client.py +++ b/src/databricks/sql/client.py @@ -247,8 +247,12 @@ def _close(self, close_cursors=True) -> None: if close_cursors: for cursor in self._cursors: cursor.close() + + logger.info(f"Closing session {self.get_session_id_hex()}") + if not self.open: + logger.debug("Session appears to have been closed already") + try: - logger.info(f"Closing session {self.get_session_id_hex()}") self.thrift_backend.close_session(self._session_handle) except DatabaseError as e: if "Invalid SessionHandle" in str(e): diff --git a/tests/e2e/test_driver.py b/tests/e2e/test_driver.py index 748aa949..26b7d186 100644 --- a/tests/e2e/test_driver.py +++ b/tests/e2e/test_driver.py @@ -618,7 +618,7 @@ def test_close_connection_closes_cursors(self): def test_closing_a_closed_connection_doesnt_fail(self): - with self.assertLogs("databricks.sql", "WARNING") as cm: + with self.assertLogs("databricks.sql", level="DEBUG",) as cm: # Second .close() call is when this context manager exits with self.connection() as conn: # First .close() call is explicit here @@ -628,7 +628,7 @@ def test_closing_a_closed_connection_doesnt_fail(self): for log in cm.output: if expected_message_was_found: break - target = "Attempted to close session that was already closed" + target = "Session appears to have been closed already" expected_message_was_found = target in log self.assertTrue(expected_message_was_found, "Did not find expected log messages")