diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4efaba0c..f4ec73e7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -148,16 +148,14 @@ The suites marked `[not documented]` require additional configuration which will SQLAlchemy provides reusable tests for testing dialect implementations. -To run these tests, assuming the environment variables needed for e2e tests are set, do the following: - ``` -cd src/databricks/sqlalchemy -poetry run python -m pytest test/sqlalchemy_dialect_compliance.py --dburi \ +poetry shell +cd src/databricks/sqlalchemy/test +python -m pytest test_suite.py --dburi \ "databricks://token:$access_token@$host?http_path=$http_path&catalog=$catalog&schema=$schema" ``` -Some of these of these tests fail currently. We're working on getting -relavent tests passing and others skipped. +Some of these of these tests fail currently. We're working on getting relevant tests passing and others skipped. ### Code formatting diff --git a/src/databricks/sqlalchemy/__init__.py b/src/databricks/sqlalchemy/__init__.py index d1d4782d..f701d8a8 100644 --- a/src/databricks/sqlalchemy/__init__.py +++ b/src/databricks/sqlalchemy/__init__.py @@ -13,7 +13,7 @@ from databricks import sql # This import is required to process our @compiles decorators -import databricks.sqlalchemy.types +import databricks.sqlalchemy._types as dialect_type_impl from databricks.sqlalchemy.base import ( @@ -48,6 +48,12 @@ class DatabricksDialect(default.DefaultDialect): non_native_boolean_check_constraint: bool = False paramstyle: str = "named" + colspecs = { + sqlalchemy.types.DateTime: dialect_type_impl.DatabricksDateTimeNoTimezoneType, + sqlalchemy.types.Time: dialect_type_impl.DatabricksTimeType, + sqlalchemy.types.String: dialect_type_impl.DatabricksStringType, + } + @classmethod def dbapi(cls): return sql @@ -130,7 +136,6 @@ def get_columns(self, connection, table_name, schema=None, **kwargs): columns = [] for col in resp: - # Taken from PyHive. This removes added type info from decimals and maps _col_type = re.search(r"^\w+", col.TYPE_NAME).group(0) this_column = { @@ -277,6 +282,13 @@ def get_schema_names(self, connection, **kw): # TODO: replace with call to cursor.schemas() once its performance matches raw SQL return [row[0] for row in connection.execute("SHOW SCHEMAS")] + + @classmethod + def load_provisioning(cls): + try: + __import__("databricks.sqlalchemy.provision") + except ImportError: + pass @event.listens_for(Engine, "do_connect") diff --git a/src/databricks/sqlalchemy/_types.py b/src/databricks/sqlalchemy/_types.py new file mode 100644 index 00000000..3fdfef74 --- /dev/null +++ b/src/databricks/sqlalchemy/_types.py @@ -0,0 +1,213 @@ +import sqlalchemy +from sqlalchemy.ext.compiler import compiles + +from typing import Union + +from datetime import datetime + + +from databricks.sql.utils import ParamEscaper + + +@compiles(sqlalchemy.types.Enum, "databricks") +@compiles(sqlalchemy.types.String, "databricks") +@compiles(sqlalchemy.types.Text, "databricks") +@compiles(sqlalchemy.types.Time, "databricks") +@compiles(sqlalchemy.types.Unicode, "databricks") +@compiles(sqlalchemy.types.UnicodeText, "databricks") +@compiles(sqlalchemy.types.Uuid, "databricks") +def compile_string_databricks(type_, compiler, **kw): + """ + We override the default compilation for Enum(), String(), Text(), and Time() because SQLAlchemy + defaults to incompatible / abnormal compiled names + + Enum -> VARCHAR + String -> VARCHAR[LENGTH] + Text -> VARCHAR[LENGTH] + Time -> TIME + Unicode -> VARCHAR[LENGTH] + UnicodeText -> TEXT + Uuid -> CHAR[32] + + But all of these types will be compiled to STRING in Databricks SQL + """ + return "STRING" + + +@compiles(sqlalchemy.types.Integer, "databricks") +def compile_integer_databricks(type_, compiler, **kw): + """ + We need to override the default Integer compilation rendering because Databricks uses "INT" instead of "INTEGER" + """ + return "INT" + + +@compiles(sqlalchemy.types.LargeBinary, "databricks") +def compile_binary_databricks(type_, compiler, **kw): + """ + We need to override the default LargeBinary compilation rendering because Databricks uses "BINARY" instead of "BLOB" + """ + return "BINARY" + + +@compiles(sqlalchemy.types.Numeric, "databricks") +def compile_numeric_databricks(type_, compiler, **kw): + """ + We need to override the default Numeric compilation rendering because Databricks uses "DECIMAL" instead of "NUMERIC" + + The built-in visit_DECIMAL behaviour captures the precision and scale. Here we're just mapping calls to compile Numeric + to the SQLAlchemy Decimal() implementation + """ + return compiler.visit_DECIMAL(type_, **kw) + + +@compiles(sqlalchemy.types.DateTime, "databricks") +def compile_datetime_databricks(type_, compiler, **kw): + """ + We need to override the default DateTime compilation rendering because Databricks uses "TIMESTAMP" instead of "DATETIME" + """ + return "TIMESTAMP_NTZ" + + +@compiles(sqlalchemy.types.ARRAY, "databricks") +def compile_array_databricks(type_, compiler, **kw): + """ + SQLAlchemy's default ARRAY can't compile as it's only implemented for Postgresql. + The Postgres implementation works for Databricks SQL, so we duplicate that here. + + :type_: + This is an instance of sqlalchemy.types.ARRAY which always includes an item_type attribute + which is itself an instance of TypeEngine + + https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.ARRAY + """ + + inner = compiler.process(type_.item_type, **kw) + + return f"ARRAY<{inner}>" + + +class DatabricksDateTimeNoTimezoneType(sqlalchemy.types.TypeDecorator): + """The decimal that pysql creates when it receives the contents of a TIMESTAMP_NTZ + includes a timezone of 'Etc/UTC'. But since SQLAlchemy's test suite assumes that + the sqlalchemy.types.DateTime type will return a datetime.datetime _without_ any + timezone set, we need to strip the timezone off the value received from pysql. + + It's not clear if DBR sends a timezone to pysql or if pysql is adding it. This could be a bug. + """ + + impl = sqlalchemy.types.DateTime + + cache_ok = True + + def process_result_value(self, value: Union[None, datetime], dialect): + if value is None: + return None + return value.replace(tzinfo=None) + + +class DatabricksTimeType(sqlalchemy.types.TypeDecorator): + """Databricks has no native TIME type. So we store it as a string.""" + + impl = sqlalchemy.types.Time + cache_ok = True + + TIME_WITH_MICROSECONDS_FMT = "%H:%M:%S.%f" + TIME_NO_MICROSECONDS_FMT = "%H:%M:%S" + + def process_bind_param(self, value: Union[datetime.time, None], dialect) -> str: + """Values sent to the database are converted to %:H:%M:%S strings.""" + if value is None: + return None + return value.strftime(self.TIME_WITH_MICROSECONDS_FMT) + + def process_literal_param(self, value, dialect) -> datetime.time: + """It's not clear to me why this is necessary. Without it, SQLAlchemy's Timetest:test_literal fails + because the string literal renderer receives a str() object and calls .isoformat() on it. + + Whereas this method receives a datetime.time() object which is subsequently passed to that + same renderer. And that works. + + UPDATE: After coping with the literal_processor override in DatabricksStringType, I suspect a similar + mechanism is at play. Two different processors are are called in sequence. This is likely a byproduct + of Databricks not having a true TIME type. I think the string representation of Time() types is + somehow affecting the literal rendering process. But as long as this passes the tests, I'm not + worried about it. + """ + return value + + def process_result_value( + self, value: Union[None, str], dialect + ) -> Union[datetime.time, None]: + """Values received from the database are parsed into datetime.time() objects""" + if value is None: + return None + + try: + _parsed = datetime.strptime(value, self.TIME_WITH_MICROSECONDS_FMT) + except ValueError: + # If the string doesn't have microseconds, try parsing it without them + _parsed = datetime.strptime(value, self.TIME_NO_MICROSECONDS_FMT) + + return _parsed.time() + + +class DatabricksStringType(sqlalchemy.types.TypeDecorator): + """We have to implement our own String() type because SQLAlchemy's default implementation + wants to escape single-quotes with a doubled single-quote. Databricks uses a backslash for + escaping of literal strings. And SQLAlchemy's default escaping breaks Databricks SQL. + """ + + impl = sqlalchemy.types.String + cache_ok = True + pe = ParamEscaper() + + def process_literal_param(self, value, dialect) -> str: + """SQLAlchemy's default string escaping for backslashes doesn't work for databricks. The logic here + implements the same logic as our legacy inline escaping logic. + """ + + return self.pe.escape_string(value) + + def literal_processor(self, dialect): + """We manually override this method to prevent further processing of the string literal beyond + what happens in the process_literal_param() method. + + The SQLAlchemy docs _specifically_ say to not override this method. + + It appears that any processing that happens from TypeEngine.process_literal_param happens _before_ + and _in addition to_ whatever the class's impl.literal_processor() method does. The String.literal_processor() + method performs a string replacement that doubles any single-quote in the contained string. This raises a syntax + error in Databricks. And it's not necessary because ParamEscaper() already implements all the escaping we need. + + We should consider opening an issue on the SQLAlchemy project to see if I'm using it wrong. + + See type_api.py::TypeEngine.literal_processor: + + ```python + def process(value: Any) -> str: + return fixed_impl_processor( + fixed_process_literal_param(value, dialect) + ) + ``` + + That call to fixed_impl_processor wraps the result of fixed_process_literal_param (which is the + process_literal_param defined in our Databricks dialect) + + https://docs.sqlalchemy.org/en/20/core/custom_types.html#sqlalchemy.types.TypeDecorator.literal_processor + """ + + def process(value): + """This is a copy of the default String.literal_processor() method but stripping away + its double-escaping behaviour for single-quotes. + """ + + _step1 = self.process_literal_param(value, dialect="databricks") + if dialect.identifier_preparer._double_percents: + _step2 = _step1.replace("%", "%%") + else: + _step2 = _step1 + + return "%s" % _step2 + + return process diff --git a/src/databricks/sqlalchemy/provision.py b/src/databricks/sqlalchemy/provision.py new file mode 100644 index 00000000..55e9f3a0 --- /dev/null +++ b/src/databricks/sqlalchemy/provision.py @@ -0,0 +1,12 @@ +from sqlalchemy.testing.provision import create_db, drop_db + +@create_db.for_db("databricks") +def _databricks_create_db(cfg, eng, ident): + with eng.begin() as conn: + create_string = "CREATE SCHEMA `main`.`%s`" % ident + conn.exec_driver_sql(create_string) + +@drop_db.for_db("databricks") +def _databricks_drop_db(cfg, eng, ident): + with eng.begin() as conn: + conn.exec_driver_sql("DROP SCHEMA `main`.`%s`" % ident) \ No newline at end of file diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index 7da46005..0e3d2ffb 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -1,20 +1,4 @@ """ -This module is supposedly used by the compliance tests to control which tests are run based on database capabilities. -However, based on some experimentation that does not appear to be consistently the case. Until we better understand -when these requirements are and are not implemented, we prefer to manually capture the exact nature of the failures -and errors. - -Once we better understand how to use requirements.py, an example exclusion will look like this: - - import sqlalchemy.testing.requirements - import sqlalchemy.testing.exclusions - - class Requirements(sqlalchemy.testing.requirements.SuiteRequirements): - @property - def __some_example_requirement(self): - return sqlalchemy.testing.exclusions.closed - - The complete list of requirements is provided by SQLAlchemy here: https://github.com/sqlalchemy/sqlalchemy/blob/main/lib/sqlalchemy/testing/requirements.py @@ -23,12 +7,107 @@ def __some_example_requirement(self): import sqlalchemy.testing.requirements import sqlalchemy.testing.exclusions -import logging +class Requirements(sqlalchemy.testing.requirements.SuiteRequirements): + @property + def date_historic(self): + """target dialect supports representation of Python + datetime.datetime() objects with historic (pre 1970) values.""" + + return sqlalchemy.testing.exclusions.open() -logger = logging.getLogger(__name__) + @property + def datetime_historic(self): + """target dialect supports representation of Python + datetime.datetime() objects with historic (pre 1970) values.""" -logger.warning("requirements.py is not currently employed by Databricks dialect") + return sqlalchemy.testing.exclusions.open() + @property + def datetime_literals(self): + """target dialect supports rendering of a date, time, or datetime as a + literal string, e.g. via the TypeEngine.literal_processor() method. -class Requirements(sqlalchemy.testing.requirements.SuiteRequirements): - pass + """ + + return sqlalchemy.testing.exclusions.open() + + @property + def timestamp_microseconds(self): + """target dialect supports representation of Python + datetime.datetime() with microsecond objects but only + if TIMESTAMP is used.""" + + return sqlalchemy.testing.exclusions.open() + + @property + def time_microseconds(self): + """target dialect supports representation of Python + datetime.time() with microsecond objects. + + This requirement declaration isn't needed but I've included it here for completeness. + Since Databricks doesn't have a TIME type, SQLAlchemy will compile Time() columns + as STRING Databricks data types. And we use a custom time type to render those strings + between str() and time.time() representations. Therefore we can store _any_ precision + that SQLAlchemy needs. The time_microseconds requirement defaults to ON for all dialects + except mssql, mysql, mariadb, and oracle. + """ + + return sqlalchemy.testing.exclusions.open() + + @property + def precision_generic_float_type(self): + """target backend will return native floating point numbers with at + least seven decimal places when using the generic Float type. + + Databricks sometimes only returns six digits of precision for the generic Float type + """ + return sqlalchemy.testing.exclusions.closed() + + @property + def literal_float_coercion(self): + """target backend will return the exact float value 15.7563 + with only four significant digits from this statement: + + SELECT :param + + where :param is the Python float 15.7563 + + i.e. it does not return 15.75629997253418 + + Without additional work, Databricks returns 15.75629997253418 + This is a potential area where we could override the Float literal processor. + Will leave to a PM to decide if we should do so. + """ + return sqlalchemy.testing.exclusions.closed() + + @property + def precision_numerics_enotation_large(self): + """target backend supports Decimal() objects using E notation + to represent very large values. + + Databricks supports E notation for FLOAT data types but not for DECIMAL types, + which is the underlying data type SQLAlchemy uses for Numeric() types. + + """ + return sqlalchemy.testing.exclusions.closed() + + @property + def infinity_floats(self): + """The Float type can persist and load float('inf'), float('-inf').""" + + return sqlalchemy.testing.exclusions.open() + + @property + def precision_numerics_retains_significant_digits(self): + """A precision numeric type will return empty significant digits, + i.e. a value such as 10.000 will come back in Decimal form with + the .000 maintained.""" + + return sqlalchemy.testing.exclusions.open() + + @property + def array_type(self): + """While Databricks does support ARRAY types, pysql cannot bind them. So + we cannot use them with SQLAlchemy""" + + return sqlalchemy.testing.exclusions.closed() diff --git a/src/databricks/sqlalchemy/setup.cfg b/src/databricks/sqlalchemy/setup.cfg index ab89d17d..81c7095e 100644 --- a/src/databricks/sqlalchemy/setup.cfg +++ b/src/databricks/sqlalchemy/setup.cfg @@ -2,3 +2,6 @@ [sqla_testing] requirement_cls=databricks.sqlalchemy.requirements:Requirements profile_file=profiles.txt + +[db] +databricks= \ No newline at end of file diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 7a840404..055720ac 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -24,150 +24,9 @@ # See further: https://github.com/sqlalchemy/sqlalchemy/blob/rel_1_4_48/README.dialects.rst +@pytest.mark.skip(reason="pysql doesn't support binding of BINARY type parameters") class BinaryTest(BinaryTest): - @pytest.mark.skip(reason="Binary type is not implemented.") - def test_binary_roundtrip(self): - """ - Exception: - sqlalchemy.exc.StatementError: (builtins.AttributeError) module 'databricks.sql' has no attribute 'Binary' - """ - - @pytest.mark.skip(reason="Binary type is not implemented.") - def test_pickle_roundtrip(self): - """ - Exception: - sqlalchemy.exc.StatementError: (builtins.AttributeError) module 'databricks.sql' has no attribute 'Binary' - """ - - -class DateHistoricTest(DateHistoricTest): - @pytest.mark.skip( - reason="Date type implementation needs work. Cannot render literal values." - ) - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.date(1727, 4, 1)" with datatype DATE - """ - - @pytest.mark.skip( - reason="Date type implementation needs work. Cannot render literal values." - ) - def test_select_direct(self): - """ - Exception: - AssertionError: '1727-04-01' != datetime.date(1727, 4, 1) - """ - - -class DateTest(DateTest): - @pytest.mark.skip( - reason="Date type implementation needs work. Cannot render literal values." - ) - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.date(2012, 10, 15)" with datatype DATE - """ - - @pytest.mark.skip( - reason="Date type implementation needs work. Cannot render literal values." - ) - def test_select_direct(self): - """ - Exception: - AssertionError: '2012-10-15' != datetime.date(2012, 10, 15) - """ - - -class DateTimeHistoricTest(DateTimeHistoricTest): - @pytest.mark.skip(reason="Date type implementation needs work") - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.datetime(1850, 11, 10, 11, 52, 35)" with datatype DATETIME - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_round_trip(self): - """ - Exception: - AssertionError: (datetime.datetime(1850, 11, 10, 11, 52, 35, tzinfo=),) != (datetime.datetime(1850, 11, 10, 11, 52, 35),) - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_round_trip_decorated(self): - """ - Exception: - AssertionError: (datetime.datetime(1850, 11, 10, 11, 52, 35, tzinfo=),) != (datetime.datetime(1850, 11, 10, 11, 52, 35),) - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_select_direct(self): - """ - Exception: - AssertionError: '1850-11-10 11:52:35.000000' != datetime.datetime(1850, 11, 10, 11, 52, 35) - """ - - -class DateTimeMicrosecondsTest(DateTimeMicrosecondsTest): - @pytest.mark.skip(reason="Date type implementation needs work") - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.datetime(2012, 10, 15, 12, 57, 18, 396)" with datatype DATETIME - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_round_trip(self): - """ - Exception: - AssertionError: (datetime.datetime(2012, 10, 15, 12, 57, 18, 396, tzinfo=),) != (datetime.datetime(2012, 10, 15, 12, 57, 18, 396),) - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_round_trip_decorated(self): - """ - Exception: - AssertionError: (datetime.datetime(2012, 10, 15, 12, 57, 18, 396, tzinfo=),) != (datetime.datetime(2012, 10, 15, 12, 57, 18, 396),) - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_select_direct(self): - """ - Exception: - AssertionError: '2012-10-15 12:57:18.000396' != datetime.datetime(2012, 10, 15, 12, 57, 18, 396) - """ - - -class DateTimeTest(DateTimeTest): - @pytest.mark.skip(reason="Date type implementation needs work") - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.datetime(2012, 10, 15, 12, 57, 18)" with datatype DATETIME - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_round_trip(self): - """ - Exception: - AssertionError: (datetime.datetime(2012, 10, 15, 12, 57, 18, tzinfo=),) != (datetime.datetime(2012, 10, 15, 12, 57, 18),) - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_round_trip_decorated(self): - """ - Exception: - AssertionError: (datetime.datetime(2012, 10, 15, 12, 57, 18, tzinfo=),) != (datetime.datetime(2012, 10, 15, 12, 57, 18),) - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_select_direct(self): - """ - Exception: - AssertionError: '2012-10-15 12:57:18.000000' != datetime.datetime(2012, 10, 15, 12, 57, 18) - """ + pass class FetchLimitOffsetTest(FetchLimitOffsetTest): @@ -292,80 +151,6 @@ def test_long_convention_name(self): """ -class NumericTest(NumericTest): - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_decimal_coerce_round_trip_w_cast(self): - """ - Exception: - AssertionError: Decimal('16') != Decimal('15.7563') - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_enotation_decimal(self): - """ - Exception: - AssertionError: {Decimal('0'), Decimal('1')} != {Decimal('0.70000000000696'), Decimal('1E-7'), Decimal('0.00001'), Decimal('6.96E-12'), Decimal('0.001'), Decimal('5.940696E-8'), Decimal('0.01000005940696'), Decimal('1E-8'), Decimal('0.01'), Decimal('0.000001'), Decimal('0.0001'), Decimal('6.96E-10')} - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_enotation_decimal_large(self): - """ - Exception: - sqlalchemy.exc.DatabaseError: (databricks.sql.exc.ServerOperationError) [CAST_OVERFLOW_IN_TABLE_INSERT] Fail to insert a value of "DOUBLE" type into the "DECIMAL(10,0)" type column `x` due to an overflow. Use `try_cast` on the input value to tolerate overflow and return NULL instead. - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_float_custom_scale(self): - """ - Exception: - AssertionError: {Decimal('15.7563829')} != {Decimal('15.7563827')} - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_many_significant_digits(self): - """ - Exception: - sqlalchemy.exc.DatabaseError: (databricks.sql.exc.ServerOperationError) [CAST_OVERFLOW_IN_TABLE_INSERT] Fail to insert a value of "DECIMAL(22,2)" type into the "DECIMAL(10,0)" type column `x` due to an overflow. Use `try_cast` on the input value to tolerate overflow and return NULL instead. - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_numeric_as_decimal(self): - """ - Exception: - AssertionError: {Decimal('16')} != {Decimal('15.7563')} - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_numeric_as_float(self): - """ - Exception: - AssertionError: {16.0} != {15.7563} - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_precision_decimal(self): - """ - Exception: - AssertionError: {Decimal('0'), Decimal('900'), Decimal('54')} != {Decimal('0.004354'), Decimal('900.0'), Decimal('54.234246451650')} - """ - - class RowFetchTest(RowFetchTest): @pytest.mark.skip( reason="Date type implementation needs work. Timezone information not preserved." @@ -377,232 +162,6 @@ def test_row_w_scalar_select(self): """ -class StringTest(StringTest): - @pytest.mark.skip( - reason="String implementation needs work. Quote escaping is inconsistent between read/write." - ) - def test_literal_backslashes(self): - """ - Exception: - AssertionError: assert 'backslash one backslash two \\ end' in ['backslash one \\ backslash two \\\\ end'] - """ - - @pytest.mark.skip( - reason="String implementation needs work. Quote escaping is inconsistent between read/write." - ) - def test_literal_quoting(self): - """ - Exception: - assert 'some text hey "hi there" thats text' in ['some \'text\' hey "hi there" that\'s text'] - """ - - -class TextTest(TextTest): - """Fixing StringTest should fix these failures also.""" - - @pytest.mark.skip( - reason="String implementation needs work. See comments from StringTest." - ) - def test_literal_backslashes(self): - """ - Exception: - AssertionError: assert 'backslash one backslash two \\ end' in ['backslash one \\ backslash two \\\\ end'] - """ - - @pytest.mark.skip( - reason="String implementation needs work. See comments from StringTest." - ) - def test_literal_quoting(self): - """ - Exception: - assert 'some text hey "hi there" thats text' in ['some \'text\' hey "hi there" that\'s text'] - """ - - -class TimeMicrosecondsTest(TimeMicrosecondsTest): - @pytest.mark.skip( - reason="Time type implementation needs work. Microseconds are not handled at all." - ) - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.time(12, 57, 18, 396)" with datatype TIME - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Microseconds are not handled at all." - ) - def test_null_bound_comparison(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18.000396 - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Microseconds are not handled at all." - ) - def test_round_trip(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18.000396 - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Microseconds are not handled at all." - ) - def test_round_trip_decorated(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18.000396 - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Microseconds are not handled at all." - ) - def test_select_direct(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18.000396 - """ - - -class TimeTest(TimeTest): - @pytest.mark.skip( - reason="Time type implementation needs work. Dialect cannot write literal values." - ) - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.time(12, 57, 18)" with datatype TIME - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Dialect cannot write literal values." - ) - def test_null_bound_comparison(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18 - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Dialect cannot write literal values." - ) - def test_round_trip(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18 - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Dialect cannot write literal values." - ) - def test_round_trip_decorated(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18 - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Dialect cannot write literal values." - ) - def test_select_direct(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18 - """ - - -class TimestampMicrosecondsTest(TimestampMicrosecondsTest): - @pytest.mark.skip( - reason="Time type implementation needs work. Timezone not preserved. Cannot render literal values." - ) - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.datetime(2012, 10, 15, 12, 57, 18, 396)" with datatype TIMESTAMP - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Timezone not preserved. Cannot render literal values." - ) - def test_round_trip(self): - """ - Exception: - AssertionError: (datetime.datetime(2012, 10, 15, 12, 57, 18, 396, tzinfo=),) != (datetime.datetime(2012, 10, 15, 12, 57, 18, 396),) - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Timezone not preserved. Cannot render literal values." - ) - def test_round_trip_decorated(self): - """ - Exception: - AssertionError: (datetime.datetime(2012, 10, 15, 12, 57, 18, 396, tzinfo=),) != (datetime.datetime(2012, 10, 15, 12, 57, 18, 396),) - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Timezone not preserved. Cannot render literal values." - ) - def test_select_direct(self): - """ - Exception: - AssertionError: '2012-10-15 12:57:18.000396' != datetime.datetime(2012, 10, 15, 12, 57, 18, 396) - """ - - -class DateTimeCoercedToDateTimeTest(DateTimeCoercedToDateTimeTest): - @pytest.mark.skip( - reason="Date type implementation needs work. Literal values not coerced properly." - ) - def test_select_direct(self): - """ - Exception: - AssertionError: '2012-10-15 12:57:18.000000' != datetime.datetime(2012, 10, 15, 12, 57, 18) - assert '2012-10-15 12:57:18.000000' == datetime.datetime(2012, 10, 15, 12, 57, 18) - """ - - @pytest.mark.skip(reason="Forthcoming deprecated feature.") - def test_literal(self): - """ - Exception: - sqlalchemy.exc.RemovedIn20Warning: Deprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. To prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to "sqlalchemy<2.0". Set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message. (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9) - - """ - - @pytest.mark.skip(reason="urllib3 is complaining") - def test_null(self): - """ - Exception: - urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - - """ - - @pytest.mark.skip(reason="urllib3 is complaining") - def test_null_bound_comparison(self): - """ - Exception: - urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - - """ - - @pytest.mark.skip(reason="urllib3 is complaining") - def test_round_trip(self): - """ - Exception: - urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - - """ - - @pytest.mark.skip(reason="urllib3 is complaining") - def test_round_trip_decorated(self): - """ - Exception: - urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - - """ - - class ExceptionTest(ExceptionTest): @pytest.mark.skip(reason="Databricks may not support this method.") def test_integrity_error(self): @@ -736,42 +295,6 @@ def test_numeric_reflection(self): """ -class BooleanTest(BooleanTest): - @pytest.mark.skip(reason="Boolean type needs work.") - def test_null(self): - """ - This failure appears to infrastructure based. Should attempt a re-run. - Exception: - urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - """ - pass - - @pytest.mark.skip(reason="Boolean type needs work.") - def test_render_literal_bool(self): - """ - Exception: - sqlalchemy.exc.RemovedIn20Warning: Deprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. To prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to "sqlalchemy<2.0". Set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message. (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9) - _ ERROR at setup of BooleanTest_databricks+databricks.test_render_literal_bool _ - """ - pass - - @pytest.mark.skip(reason="Boolean type needs work.") - def test_round_trip(self): - """ - Exception: - urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - """ - pass - - @pytest.mark.skip(reason="Boolean type needs work.") - def test_whereclause(self): - """ - Exception: - sqlalchemy.exc.RemovedIn20Warning: Deprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. To prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to "sqlalchemy<2.0". Set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message. (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9) - """ - pass - - class DifficultParametersTest(DifficultParametersTest): @pytest.mark.skip(reason="Error during execution. Requires investigation.") def test_round_trip_same_named_column(self): diff --git a/src/databricks/sqlalchemy/test_local/test_types.py b/src/databricks/sqlalchemy/test_local/test_types.py index 91f11e17..f7423f69 100644 --- a/src/databricks/sqlalchemy/test_local/test_types.py +++ b/src/databricks/sqlalchemy/test_local/test_types.py @@ -36,12 +36,12 @@ class DatabricksDataType(enum.Enum): sqlalchemy.types.LargeBinary: DatabricksDataType.BINARY, sqlalchemy.types.Boolean: DatabricksDataType.BOOLEAN, sqlalchemy.types.Date: DatabricksDataType.DATE, - sqlalchemy.types.DateTime: DatabricksDataType.TIMESTAMP, + sqlalchemy.types.DateTime: DatabricksDataType.TIMESTAMP_NTZ, sqlalchemy.types.Double: DatabricksDataType.DOUBLE, sqlalchemy.types.Enum: DatabricksDataType.STRING, sqlalchemy.types.Float: DatabricksDataType.FLOAT, sqlalchemy.types.Integer: DatabricksDataType.INT, - sqlalchemy.types.Interval: DatabricksDataType.TIMESTAMP, + sqlalchemy.types.Interval: DatabricksDataType.TIMESTAMP_NTZ, sqlalchemy.types.Numeric: DatabricksDataType.DECIMAL, sqlalchemy.types.PickleType: DatabricksDataType.BINARY, sqlalchemy.types.SmallInteger: DatabricksDataType.SMALLINT, diff --git a/src/databricks/sqlalchemy/types.py b/src/databricks/sqlalchemy/types.py deleted file mode 100644 index 4b10fc6f..00000000 --- a/src/databricks/sqlalchemy/types.py +++ /dev/null @@ -1,80 +0,0 @@ -import sqlalchemy -from sqlalchemy.ext.compiler import compiles - - -@compiles(sqlalchemy.types.Enum, "databricks") -@compiles(sqlalchemy.types.String, "databricks") -@compiles(sqlalchemy.types.Text, "databricks") -@compiles(sqlalchemy.types.Time, "databricks") -@compiles(sqlalchemy.types.Unicode, "databricks") -@compiles(sqlalchemy.types.UnicodeText, "databricks") -@compiles(sqlalchemy.types.Uuid, "databricks") -def compile_string_databricks(type_, compiler, **kw): - """ - We override the default compilation for Enum(), String(), Text(), and Time() because SQLAlchemy - defaults to incompatible / abnormal compiled names - - Enum -> VARCHAR - String -> VARCHAR[LENGTH] - Text -> VARCHAR[LENGTH] - Time -> TIME - Unicode -> VARCHAR[LENGTH] - UnicodeText -> TEXT - Uuid -> CHAR[32] - - But all of these types will be compiled to STRING in Databricks SQL - """ - return "STRING" - - -@compiles(sqlalchemy.types.Integer, "databricks") -def compile_integer_databricks(type_, compiler, **kw): - """ - We need to override the default Integer compilation rendering because Databricks uses "INT" instead of "INTEGER" - """ - return "INT" - - -@compiles(sqlalchemy.types.LargeBinary, "databricks") -def compile_binary_databricks(type_, compiler, **kw): - """ - We need to override the default LargeBinary compilation rendering because Databricks uses "BINARY" instead of "BLOB" - """ - return "BINARY" - - -@compiles(sqlalchemy.types.Numeric, "databricks") -def compile_numeric_databricks(type_, compiler, **kw): - """ - We need to override the default Numeric compilation rendering because Databricks uses "DECIMAL" instead of "NUMERIC" - - The built-in visit_DECIMAL behaviour captures the precision and scale. Here we're just mapping calls to compile Numeric - to the SQLAlchemy Decimal() implementation - """ - return compiler.visit_DECIMAL(type_, **kw) - - -@compiles(sqlalchemy.types.DateTime, "databricks") -def compile_datetime_databricks(type_, compiler, **kw): - """ - We need to override the default DateTime compilation rendering because Databricks uses "TIMESTAMP" instead of "DATETIME" - """ - return "TIMESTAMP" - - -@compiles(sqlalchemy.types.ARRAY, "databricks") -def compile_array_databricks(type_, compiler, **kw): - """ - SQLAlchemy's default ARRAY can't compile as it's only implemented for Postgresql. - The Postgres implementation works for Databricks SQL, so we duplicate that here. - - :type_: - This is an instance of sqlalchemy.types.ARRAY which always includes an item_type attribute - which is itself an instance of TypeEngine - - https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.ARRAY - """ - - inner = compiler.process(type_.item_type, **kw) - - return f"ARRAY<{inner}>"