Skip to content

Commit f41a996

Browse files
cbornetJesse Whitehouse
and
Jesse Whitehouse
authored
Add support for table comments (#308)
* Add support for table comments Signed-off-by: Christophe Bornet <[email protected]> * Use per-table DTE to get the table comments Signed-off-by: Christophe Bornet <[email protected]> * Revert pytest.ini change Signed-off-by: Jesse Whitehouse <[email protected]> * Fix typo in test name for columns. Move .engine and .compile into a base class. Scaffold in the Table Comment unit tests. Signed-off-by: Jesse Whitehouse <[email protected]> * Add unit tests for table comments Signed-off-by: Jesse Whitehouse <[email protected]> * Revert overrides since these aren't needed after #328 Signed-off-by: Jesse Whitehouse <[email protected]> * Stop skipping table comment portions of ComponentReflectionTest Signed-off-by: Jesse Whitehouse <[email protected]> * Move DTE parsing into _parse.py Signed-off-by: Jesse Whitehouse <[email protected]> * Add e2e test using inspector Signed-off-by: Jesse Whitehouse <[email protected]> * Add unit test for new method in _parse.py Signed-off-by: Jesse Whitehouse <[email protected]> * Fix assertion in column comment test This was missed in #328 Signed-off-by: Jesse Whitehouse <[email protected]> --------- Signed-off-by: Christophe Bornet <[email protected]> Signed-off-by: Jesse Whitehouse <[email protected]> Co-authored-by: Jesse Whitehouse <[email protected]>
1 parent a7f4773 commit f41a996

File tree

8 files changed

+174
-61
lines changed

8 files changed

+174
-61
lines changed

src/databricks/sqlalchemy/_ddl.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,13 @@ def __init__(self, dialect):
1616

1717
class DatabricksDDLCompiler(compiler.DDLCompiler):
1818
def post_create_table(self, table):
19-
return " USING DELTA"
19+
post = " USING DELTA"
20+
if table.comment:
21+
comment = self.sql_compiler.render_literal_value(
22+
table.comment, sqltypes.String()
23+
)
24+
post += " COMMENT " + comment
25+
return post
2026

2127
def visit_unique_constraint(self, constraint, **kw):
2228
logger.warning("Databricks does not support unique constraints")
@@ -61,7 +67,7 @@ def get_column_specification(self, column, **kwargs):
6167
feature in the future, similar to the Microsoft SQL Server dialect.
6268
"""
6369
if column is column.table._autoincrement_column or column.autoincrement is True:
64-
logger.warn(
70+
logger.warning(
6571
"Databricks dialect ignores SQLAlchemy's autoincrement semantics. Use explicit Identity() instead."
6672
)
6773

src/databricks/sqlalchemy/_parse.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,20 @@ def match_dte_rows_by_value(dte_output: List[Dict[str, str]], match: str) -> Lis
251251
return output_rows
252252

253253

254+
def match_dte_rows_by_key(dte_output: List[Dict[str, str]], match: str) -> List[dict]:
255+
"""Return a list of dictionaries containing only the col_name:data_type pairs where the `col_name`
256+
value contains the match argument.
257+
"""
258+
259+
output_rows = []
260+
261+
for row_dict in dte_output:
262+
if match in row_dict["col_name"]:
263+
output_rows.append(row_dict)
264+
265+
return output_rows
266+
267+
254268
def get_fk_strings_from_dte_output(dte_output: List[Dict[str, str]]) -> List[dict]:
255269
"""If the DESCRIBE TABLE EXTENDED output contains foreign key constraints, return a list of dictionaries,
256270
one dictionary per defined constraint
@@ -275,6 +289,15 @@ def get_pk_strings_from_dte_output(
275289
return output
276290

277291

292+
def get_comment_from_dte_output(dte_output: List[Dict[str, str]]) -> Optional[str]:
293+
"""Returns the value of the first "Comment" col_name data in dte_output"""
294+
output = match_dte_rows_by_key(dte_output, "Comment")
295+
if not output:
296+
return None
297+
else:
298+
return output[0]["data_type"]
299+
300+
278301
# The keys of this dictionary are the values we expect to see in a
279302
# TGetColumnsRequest's .TYPE_NAME attribute.
280303
# These are enumerated in ttypes.py as class TTypeId.

src/databricks/sqlalchemy/base.py

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
import re
2-
from typing import Any, List, Optional, Dict, Union, Collection, Iterable, Tuple
1+
from typing import Any, List, Optional, Dict, Union
32

43
import databricks.sqlalchemy._ddl as dialect_ddl_impl
54
import databricks.sqlalchemy._types as dialect_type_impl
@@ -11,19 +10,20 @@
1110
build_pk_dict,
1211
get_fk_strings_from_dte_output,
1312
get_pk_strings_from_dte_output,
13+
get_comment_from_dte_output,
1414
parse_column_info_from_tgetcolumnsresponse,
1515
)
1616

1717
import sqlalchemy
1818
from sqlalchemy import DDL, event
1919
from sqlalchemy.engine import Connection, Engine, default, reflection
20-
from sqlalchemy.engine.reflection import ObjectKind
2120
from sqlalchemy.engine.interfaces import (
2221
ReflectedForeignKeyConstraint,
2322
ReflectedPrimaryKeyConstraint,
2423
ReflectedColumn,
25-
TableKey,
24+
ReflectedTableComment,
2625
)
26+
from sqlalchemy.engine.reflection import ReflectionDefaults
2727
from sqlalchemy.exc import DatabaseError, SQLAlchemyError
2828

2929
try:
@@ -285,7 +285,7 @@ def get_table_names(self, connection: Connection, schema=None, **kwargs):
285285
views_result = self.get_view_names(connection=connection, schema=schema)
286286

287287
# In Databricks, SHOW TABLES FROM <schema> returns both tables and views.
288-
# Potential optimisation: rewrite this to instead query informtation_schema
288+
# Potential optimisation: rewrite this to instead query information_schema
289289
tables_minus_views = [
290290
row.tableName for row in tables_result if row.tableName not in views_result
291291
]
@@ -328,7 +328,7 @@ def get_materialized_view_names(
328328
def get_temp_view_names(
329329
self, connection: Connection, schema: Optional[str] = None, **kw: Any
330330
) -> List[str]:
331-
"""A wrapper around get_view_names taht fetches only the names of temporary views"""
331+
"""A wrapper around get_view_names that fetches only the names of temporary views"""
332332
return self.get_view_names(connection, schema, only_temp=True)
333333

334334
def do_rollback(self, dbapi_connection):
@@ -375,6 +375,30 @@ def get_schema_names(self, connection, **kw):
375375
schema_list = [row[0] for row in result]
376376
return schema_list
377377

378+
@reflection.cache
379+
def get_table_comment(
380+
self,
381+
connection: Connection,
382+
table_name: str,
383+
schema: Optional[str] = None,
384+
**kw: Any,
385+
) -> ReflectedTableComment:
386+
result = self._describe_table_extended(
387+
connection=connection,
388+
table_name=table_name,
389+
schema_name=schema,
390+
)
391+
392+
if result is None:
393+
return ReflectionDefaults.table_comment()
394+
395+
comment = get_comment_from_dte_output(result)
396+
397+
if comment:
398+
return dict(text=comment)
399+
else:
400+
return ReflectionDefaults.table_comment()
401+
378402

379403
@event.listens_for(Engine, "do_connect")
380404
def receive_do_connect(dialect, conn_rec, cargs, cparams):

src/databricks/sqlalchemy/requirements.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,18 @@ def table_reflection(self):
159159
"""target database has general support for table reflection"""
160160
return sqlalchemy.testing.exclusions.open()
161161

162+
@property
163+
def comment_reflection(self):
164+
"""Indicates if the database support table comment reflection"""
165+
return sqlalchemy.testing.exclusions.open()
166+
167+
@property
168+
def comment_reflection_full_unicode(self):
169+
"""Indicates if the database support table comment reflection in the
170+
full unicode range, including emoji etc.
171+
"""
172+
return sqlalchemy.testing.exclusions.open()
173+
162174
@property
163175
def temp_table_reflection(self):
164176
"""ComponentReflection test is intricate and simply cannot function without this exclusion being defined here.

src/databricks/sqlalchemy/test/_future.py

Lines changed: 0 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,7 @@
1313
ComponentReflectionTest,
1414
ComponentReflectionTestExtra,
1515
CTETest,
16-
FutureTableDDLTest,
1716
InsertBehaviorTest,
18-
TableDDLTest,
1917
)
2018
from sqlalchemy.testing.suite import (
2119
ArrayTest,
@@ -53,7 +51,6 @@ class FutureFeature(Enum):
5351
PROVISION = "event-driven engine configuration"
5452
REGEXP = "_visit_regexp"
5553
SANE_ROWCOUNT = "sane_rowcount support"
56-
TBL_COMMENTS = "table comment reflection"
5754
TBL_OPTS = "get_table_options method"
5855
TEST_DESIGN = "required test-fixture overrides"
5956
TUPLE_LITERAL = "tuple-like IN markers completely"
@@ -251,36 +248,7 @@ class FutureWeCanSetDefaultSchemaWEventsTest(FutureWeCanSetDefaultSchemaWEventsT
251248
pass
252249

253250

254-
class FutureTableDDLTest(FutureTableDDLTest):
255-
@pytest.mark.skip(reason=render_future_feature(FutureFeature.TBL_COMMENTS))
256-
def test_add_table_comment(self):
257-
"""We could use requirements.comment_reflection here to disable this but prefer a more meaningful skip message"""
258-
pass
259-
260-
@pytest.mark.skip(reason=render_future_feature(FutureFeature.TBL_COMMENTS))
261-
def test_drop_table_comment(self):
262-
"""We could use requirements.comment_reflection here to disable this but prefer a more meaningful skip message"""
263-
pass
264-
265-
266-
class TableDDLTest(TableDDLTest):
267-
@pytest.mark.skip(reason=render_future_feature(FutureFeature.TBL_COMMENTS))
268-
def test_add_table_comment(self, connection):
269-
"""We could use requirements.comment_reflection here to disable this but prefer a more meaningful skip message"""
270-
pass
271-
272-
@pytest.mark.skip(reason=render_future_feature(FutureFeature.TBL_COMMENTS))
273-
def test_drop_table_comment(self, connection):
274-
"""We could use requirements.comment_reflection here to disable this but prefer a more meaningful skip message"""
275-
pass
276-
277-
278251
class ComponentReflectionTest(ComponentReflectionTest):
279-
@pytest.mark.skip(reason=render_future_feature(FutureFeature.TBL_COMMENTS))
280-
def test_get_multi_table_comment(self):
281-
"""There are 84 permutations of this test that are skipped."""
282-
pass
283-
284252
@pytest.mark.skip(reason=render_future_feature(FutureFeature.TBL_OPTS, True))
285253
def test_multi_get_table_options_tables(self):
286254
"""It's not clear what the expected ouput from this method would even _be_. Requires research."""
@@ -302,22 +270,6 @@ def test_get_multi_pk_constraint(self):
302270
def test_get_multi_check_constraints(self):
303271
pass
304272

305-
@pytest.mark.skip(reason=render_future_feature(FutureFeature.TBL_COMMENTS))
306-
def test_get_comments(self):
307-
pass
308-
309-
@pytest.mark.skip(reason=render_future_feature(FutureFeature.TBL_COMMENTS))
310-
def test_get_comments_with_schema(self):
311-
pass
312-
313-
@pytest.mark.skip(reason=render_future_feature(FutureFeature.TBL_COMMENTS))
314-
def test_comments_unicode(self):
315-
pass
316-
317-
@pytest.mark.skip(reason=render_future_feature(FutureFeature.TBL_COMMENTS))
318-
def test_comments_unicode_full(self):
319-
pass
320-
321273

322274
class ComponentReflectionTestExtra(ComponentReflectionTestExtra):
323275
@pytest.mark.skip(render_future_feature(FutureFeature.CHECK))

src/databricks/sqlalchemy/test_local/e2e/test_basic.py

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import datetime
22
import decimal
33
import os
4-
from typing import Tuple, Union
4+
from typing import Tuple, Union, List
55
from unittest import skipIf
66

77
import pytest
@@ -219,7 +219,7 @@ def test_column_comment(db_engine, metadata_obj: MetaData):
219219
connection=connection, table_name=table_name
220220
)
221221

222-
assert columns[0].get("comment") == ""
222+
assert columns[0].get("comment") == None
223223

224224
metadata_obj.drop_all(db_engine)
225225

@@ -477,7 +477,7 @@ def sample_table(metadata_obj: MetaData, db_engine: Engine):
477477

478478
table_name = "PySQLTest_{}".format(datetime.datetime.utcnow().strftime("%s"))
479479

480-
args = [
480+
args: List[Column] = [
481481
Column(colname, coltype) for colname, coltype in GET_COLUMNS_TYPE_MAP.items()
482482
]
483483

@@ -499,3 +499,45 @@ def test_get_columns(db_engine, sample_table: str):
499499
columns = inspector.get_columns(sample_table)
500500

501501
assert True
502+
503+
504+
class TestCommentReflection:
505+
@pytest.fixture(scope="class")
506+
def engine(self):
507+
HOST = os.environ.get("host")
508+
HTTP_PATH = os.environ.get("http_path")
509+
ACCESS_TOKEN = os.environ.get("access_token")
510+
CATALOG = os.environ.get("catalog")
511+
SCHEMA = os.environ.get("schema")
512+
513+
connection_string = f"databricks://token:{ACCESS_TOKEN}@{HOST}?http_path={HTTP_PATH}&catalog={CATALOG}&schema={SCHEMA}"
514+
connect_args = {"_user_agent_entry": USER_AGENT_TOKEN}
515+
516+
engine = create_engine(connection_string, connect_args=connect_args)
517+
return engine
518+
519+
@pytest.fixture
520+
def inspector(self, engine: Engine) -> Inspector:
521+
return Inspector.from_engine(engine)
522+
523+
@pytest.fixture
524+
def table(self, engine):
525+
md = MetaData()
526+
tbl = Table(
527+
"foo",
528+
md,
529+
Column("bar", String, comment="column comment"),
530+
comment="table comment",
531+
)
532+
md.create_all(bind=engine)
533+
534+
yield tbl
535+
536+
md.drop_all(bind=engine)
537+
538+
def test_table_comment_reflection(self, inspector: Inspector, table: Table):
539+
tbl_name = table.name
540+
541+
comment = inspector.get_table_comment(tbl_name)
542+
543+
assert comment == {"text": "table comment"}

src/databricks/sqlalchemy/test_local/test_ddl.py

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,24 @@
11
import pytest
22
from sqlalchemy import Column, MetaData, String, Table, create_engine
3-
from sqlalchemy.schema import CreateTable, DropColumnComment, SetColumnComment
3+
from sqlalchemy.schema import (
4+
CreateTable,
5+
DropColumnComment,
6+
DropTableComment,
7+
SetColumnComment,
8+
SetTableComment,
9+
)
410

511

6-
class TestTableCommentDDL:
12+
class DDLTestBase:
713
engine = create_engine(
814
"databricks://token:****@****?http_path=****&catalog=****&schema=****"
915
)
1016

1117
def compile(self, stmt):
1218
return str(stmt.compile(bind=self.engine))
1319

20+
21+
class TestColumnCommentDDL(DDLTestBase):
1422
@pytest.fixture
1523
def metadata(self) -> MetaData:
1624
"""Assemble a metadata object with one table containing one column."""
@@ -45,3 +53,43 @@ def test_alter_table_drop_column_comment(self, column):
4553
stmt = DropColumnComment(column)
4654
output = self.compile(stmt)
4755
assert output == "ALTER TABLE foobar ALTER COLUMN foo COMMENT ''"
56+
57+
58+
class TestTableCommentDDL(DDLTestBase):
59+
@pytest.fixture
60+
def metadata(self) -> MetaData:
61+
"""Assemble a metadata object with one table containing one column."""
62+
metadata = MetaData()
63+
64+
col1 = Column("foo", String)
65+
col2 = Column("foo", String)
66+
tbl_w_comment = Table("martin", metadata, col1, comment="foobar")
67+
tbl_wo_comment = Table("prs", metadata, col2)
68+
69+
return metadata
70+
71+
@pytest.fixture
72+
def table_with_comment(self, metadata) -> Table:
73+
return metadata.tables.get("martin")
74+
75+
@pytest.fixture
76+
def table_without_comment(self, metadata) -> Table:
77+
return metadata.tables.get("prs")
78+
79+
def test_create_table_with_comment(self, table_with_comment):
80+
stmt = CreateTable(table_with_comment)
81+
output = self.compile(stmt)
82+
assert "USING DELTA COMMENT 'foobar'" in output
83+
84+
def test_alter_table_add_comment(self, table_without_comment: Table):
85+
table_without_comment.comment = "wireless mechanical keyboard"
86+
stmt = SetTableComment(table_without_comment)
87+
output = self.compile(stmt)
88+
89+
assert output == "COMMENT ON TABLE prs IS 'wireless mechanical keyboard'"
90+
91+
def test_alter_table_drop_comment(self, table_with_comment):
92+
"""The syntax for COMMENT ON is here: https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-comment.html"""
93+
stmt = DropTableComment(table_with_comment)
94+
output = self.compile(stmt)
95+
assert output == "COMMENT ON TABLE martin IS NULL"

0 commit comments

Comments
 (0)