|
| 1 | +""" |
| 2 | +databricks-sql-connector includes a SQLAlchemy dialect compatible with Databricks SQL. |
| 3 | +It aims to be a drop-in replacement for the crflynn/sqlalchemy-databricks project, that implements |
| 4 | +more of the Databricks API, particularly around table reflection, Alembic usage, and data |
| 5 | +ingestion with pandas. |
| 6 | +
|
| 7 | +Expected URI format is: databricks+thrift://token:dapi***@***.cloud.databricks.com?http_path=/sql/*** |
| 8 | +
|
| 9 | +Because of the extent of SQLAlchemy's capabilities it isn't feasible to provide examples of every |
| 10 | +usage in a single script, so we only provide a basic one here. More examples are found in our test |
| 11 | +suite at tests/e2e/sqlalchemy/test_basic.py and in the PR that implements this change: |
| 12 | +
|
| 13 | +https://github.com/databricks/databricks-sql-python/pull/57 |
| 14 | +
|
| 15 | +# What's already supported |
| 16 | +
|
| 17 | +Most of the functionality is demonstrated in the e2e tests mentioned above. The below list we |
| 18 | +derived from those test method names: |
| 19 | +
|
| 20 | + - Create and drop tables with SQLAlchemy Core |
| 21 | + - Create and drop tables with SQLAlchemy ORM |
| 22 | + - Read created tables via reflection |
| 23 | + - Modify column nullability |
| 24 | + - Insert records manually |
| 25 | + - Insert records with pandas.to_sql (note that this does not work for DataFrames with indexes) |
| 26 | +
|
| 27 | +This connector also aims to support Alembic for programmatic delta table schema maintenance. This |
| 28 | +behaviour is not yet backed by integration tests, which will follow in a subsequent PR as we learn |
| 29 | +more about customer use cases there. That said, the following behaviours have been tested manually: |
| 30 | +
|
| 31 | + - Autogenerate revisions with alembic revision --autogenerate |
| 32 | + - Upgrade and downgrade between revisions with `alembic upgrade <revision hash>` and |
| 33 | + `alembic downgrade <revision hash>` |
| 34 | +
|
| 35 | +# Known Gaps |
| 36 | + - MAP, ARRAY, and STRUCT types: this dialect can read these types out as strings. But you cannot |
| 37 | + define a SQLAlchemy model with databricks.sqlalchemy.dialect.types.DatabricksMap (e.g.) because |
| 38 | + we haven't implemented them yet. |
| 39 | + - Constraints: with the addition of information_schema to Unity Catalog, Databricks SQL supports |
| 40 | + foreign key and primary key constraints. This dialect can write these constraints but the ability |
| 41 | + for alembic to reflect and modify them programmatically has not been tested. |
| 42 | +""" |
| 43 | + |
| 44 | +import os |
| 45 | +import sqlalchemy |
| 46 | +from sqlalchemy.orm import Session |
| 47 | +from sqlalchemy import Column, String, Integer, BOOLEAN, create_engine, select |
| 48 | + |
| 49 | +try: |
| 50 | + from sqlalchemy.orm import declarative_base |
| 51 | +except ImportError: |
| 52 | + from sqlalchemy.ext.declarative import declarative_base |
| 53 | + |
| 54 | +host = os.getenv("MY_SERVER_HOSTNAME") |
| 55 | +http_path = os.getenv("MY_HTTP_PATH") |
| 56 | +access_token = os.getenv("MY_TOKEN") |
| 57 | +catalog = os.getenv("MY_CATALOG") |
| 58 | +schema = "jprakash" |
| 59 | + |
| 60 | + |
| 61 | +# Extra arguments are passed untouched to the driver |
| 62 | +# See thrift_backend.py for complete list |
| 63 | +extra_connect_args = { |
| 64 | + "_tls_verify_hostname": True, |
| 65 | + "_user_agent_entry": "PySQL Example Script", |
| 66 | +} |
| 67 | + |
| 68 | +if sqlalchemy.__version__.startswith("1.3"): |
| 69 | + # SQLAlchemy 1.3.x fails to parse the http_path, catalog, and schema from our connection string |
| 70 | + # Pass these in as connect_args instead |
| 71 | + |
| 72 | + conn_string = f"databricks://token:{access_token}@{host}" |
| 73 | + connect_args = dict(catalog=catalog, schema=schema, http_path=http_path) |
| 74 | + all_connect_args = {**extra_connect_args, **connect_args} |
| 75 | + engine = create_engine(conn_string, connect_args=all_connect_args) |
| 76 | +else: |
| 77 | + engine = create_engine( |
| 78 | + f"databricks://token:{access_token}@{host}?http_path={http_path}&catalog={catalog}&schema={schema}", |
| 79 | + connect_args=extra_connect_args, |
| 80 | + ) |
| 81 | + |
| 82 | +session = Session(bind=engine) |
| 83 | +base = declarative_base(bind=engine) |
| 84 | + |
| 85 | + |
| 86 | +class SampleObject(base): |
| 87 | + |
| 88 | + __tablename__ = "mySampleTable" |
| 89 | + |
| 90 | + name = Column(String(255), primary_key=True) |
| 91 | + episodes = Column(Integer) |
| 92 | + some_bool = Column(BOOLEAN) |
| 93 | + |
| 94 | + |
| 95 | +base.metadata.create_all() |
| 96 | + |
| 97 | +sample_object_1 = SampleObject(name="Bim Adewunmi", episodes=6, some_bool=True) |
| 98 | +sample_object_2 = SampleObject(name="Miki Meek", episodes=12, some_bool=False) |
| 99 | + |
| 100 | +session.add(sample_object_1) |
| 101 | +session.add(sample_object_2) |
| 102 | + |
| 103 | +session.commit() |
| 104 | + |
| 105 | +# SQLAlchemy 1.3 has slightly different methods |
| 106 | +if sqlalchemy.__version__.startswith("1.3"): |
| 107 | + stmt = select([SampleObject]).where( |
| 108 | + SampleObject.name.in_(["Bim Adewunmi", "Miki Meek"]) |
| 109 | + ) |
| 110 | + output = [i for i in session.execute(stmt)] |
| 111 | +else: |
| 112 | + stmt = select(SampleObject).where( |
| 113 | + SampleObject.name.in_(["Bim Adewunmi", "Miki Meek"]) |
| 114 | + ) |
| 115 | + output = [i for i in session.scalars(stmt)] |
| 116 | + |
| 117 | +assert len(output) == 2 |
| 118 | + |
| 119 | +base.metadata.drop_all() |
0 commit comments