databricks
diff --git a/‎examples/sqlalchemy.py‎
Lines changed: 0 additions & 1 deletion b/‎examples/sqlalchemy.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/databricks/sql/utils.py‎
Lines changed: 8 additions & 2 deletions b/‎src/databricks/sql/utils.py‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎src/databricks/sqlalchemy/__init__.py‎
Lines changed: 109 additions & 34 deletions b/‎src/databricks/sqlalchemy/__init__.py‎
Lines changed: 109 additions & 34 deletions
diff --git a/‎src/databricks/sqlalchemy/_ddl.py‎
Lines changed: 69 additions & 0 deletions b/‎src/databricks/sqlalchemy/_ddl.py‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎src/databricks/sqlalchemy/base.py‎
Lines changed: 0 additions & 17 deletions b/‎src/databricks/sqlalchemy/base.py‎
Lines changed: 0 additions & 17 deletions
diff --git a/‎src/databricks/sqlalchemy/pytest.ini‎ b/‎src/databricks/sqlalchemy/pytest.ini‎
diff --git a/‎src/databricks/sqlalchemy/requirements.py‎
Lines changed: 50 additions & 0 deletions b/‎src/databricks/sqlalchemy/requirements.py‎
Lines changed: 50 additions & 0 deletions
@@ -39,7 +39,6 @@
     - Constraints: with the addition of information_schema to Unity Catalog, Databricks SQL supports
       foreign key and primary key constraints. This dialect can write these constraints but the ability
       for alembic to reflect and modify them programmatically has not been tested.
-    - Delta IDENTITY columns are not yet supported.
 """
 
 import os
 
@@ -534,9 +534,15 @@ def named_parameters_to_dbsqlparams_v2(parameters: List[Any]):
 
 
 def resolve_databricks_sql_integer_type(integer):
-    """Returns the smallest Databricks SQL integer type that can contain the passed integer"""
+    """Returns DbsqlType.INTEGER unless the passed int() requires a BIGINT.
+
+    Note: TINYINT is never inferred here because it is a rarely used type and clauses like LIMIT and OFFSET
+    cannot accept TINYINT bound parameter values. If you need to bind a TINYINT value, you can explicitly
+    declare its type in a DbsqlParameter object, which will bypass this inference logic."""
     if -128 <= integer <= 127:
-        return DbSqlType.TINYINT
+        # If DBR is ever updated to permit TINYINT values passed to LIMIT and OFFSET
+        # then we can change this line to return DbSqlType.TINYINT
+        return DbSqlType.INTEGER
     elif -2147483648 <= integer <= 2147483647:
         return DbSqlType.INTEGER
     else:
 
@@ -1,24 +1,23 @@
-"""This module's layout loosely follows example of SQLAlchemy's postgres dialect
-"""
-
-import decimal, re, datetime
-from dateutil.parser import parse
+import re
+from typing import Any, Optional
 
 import sqlalchemy
-from sqlalchemy import types, event
-from sqlalchemy.engine import default, Engine
+from sqlalchemy import event
+from sqlalchemy.engine import Engine, default, reflection
+from sqlalchemy.engine.interfaces import (
+    ReflectedForeignKeyConstraint,
+    ReflectedPrimaryKeyConstraint,
+)
 from sqlalchemy.exc import DatabaseError, SQLAlchemyError
-from sqlalchemy.engine import reflection
 
-from databricks import sql
+import databricks.sqlalchemy._ddl as dialect_ddl_impl
 
 # This import is required to process our @compiles decorators
 import databricks.sqlalchemy._types as dialect_type_impl
-
-
-from databricks.sqlalchemy.base import (
-    DatabricksDDLCompiler,
-    DatabricksIdentifierPreparer,
+from databricks import sql
+from databricks.sqlalchemy.utils import (
+    extract_identifier_groups_from_string,
+    extract_identifiers_from_string,
 )
 
 try:
@@ -39,13 +38,16 @@ class DatabricksDialect(default.DefaultDialect):
     name: str = "databricks"
     driver: str = "databricks"
     default_schema_name: str = "default"
-    preparer = DatabricksIdentifierPreparer  # type: ignore
-    ddl_compiler = DatabricksDDLCompiler
+    preparer = dialect_ddl_impl.DatabricksIdentifierPreparer  # type: ignore
+    ddl_compiler = dialect_ddl_impl.DatabricksDDLCompiler
+    statement_compiler = dialect_ddl_impl.DatabricksStatementCompiler
     supports_statement_cache: bool = True
     supports_multivalues_insert: bool = True
     supports_native_decimal: bool = True
     supports_sane_rowcount: bool = False
     non_native_boolean_check_constraint: bool = False
+    supports_identity_columns: bool = True
+    supports_schemas: bool = True
     paramstyle: str = "named"
 
     colspecs = {
@@ -149,25 +151,43 @@ def get_columns(self, connection, table_name, schema=None, **kwargs):
 
         return columns
 
-    def get_pk_constraint(self, connection, table_name, schema=None, **kw):
+    @reflection.cache
+    def get_pk_constraint(
+        self,
+        connection,
+        table_name: str,
+        schema: Optional[str] = None,
+        **kw: Any,
+    ) -> ReflectedPrimaryKeyConstraint:
         """Return information about the primary key constraint on
         table_name`.
-
-        Given a :class:`_engine.Connection`, a string
-        `table_name`, and an optional string `schema`, return primary
-        key information as a dictionary with these keys:
-
-        constrained_columns
-          a list of column names that make up the primary key
-
-        name
-          optional name of the primary key constraint.
-
         """
-        # TODO: implement this behaviour
-        return {"constrained_columns": []}
 
-    def get_foreign_keys(self, connection, table_name, schema=None, **kw):
+        with self.get_connection_cursor(connection) as cursor:
+            # DESCRIBE TABLE EXTENDED doesn't support parameterised inputs :(
+            result = cursor.execute(f"DESCRIBE TABLE EXTENDED {table_name}").fetchall()
+
+        # DESCRIBE TABLE EXTENDED doesn't give a deterministic name to the field where
+        # a primary key constraint will be found in its output. So we cycle through its
+        # output looking for a match that includes "PRIMARY KEY". This is brittle. We
+        # could optionally make two roundtrips: the first would query information_schema
+        # for the name of the primary key constraint on this table, and a second to
+        # DESCRIBE TABLE EXTENDED, at which point we would know the name of the constraint.
+        # But for now we instead assume that Python list comprehension is faster than a
+        # network roundtrip.
+        dte_dict = {row["col_name"]: row["data_type"] for row in result}
+        target = [(k, v) for k, v in dte_dict.items() if "PRIMARY KEY" in v]
+        if target:
+            name, _constraint_string = target[0]
+            column_list = extract_identifiers_from_string(_constraint_string)
+        else:
+            name, column_list = None, None
+
+        return {"constrained_columns": column_list, "name": name}
+
+    def get_foreign_keys(
+        self, connection, table_name, schema=None, **kw
+    ) -> ReflectedForeignKeyConstraint:
         """Return information about foreign_keys in `table_name`.
 
         Given a :class:`_engine.Connection`, a string
@@ -190,8 +210,60 @@ def get_foreign_keys(self, connection, table_name, schema=None, **kw):
           a list of column names in the referred table that correspond to
           constrained_columns
         """
-        # TODO: Implement this behaviour
-        return []
+        """Return information about the primary key constraint on
+        table_name`.
+        """
+
+        with self.get_connection_cursor(connection) as cursor:
+            # DESCRIBE TABLE EXTENDED doesn't support parameterised inputs :(
+            result = cursor.execute(
+                f"DESCRIBE TABLE EXTENDED {schema + '.' if schema else ''}{table_name}"
+            ).fetchall()
+
+        # DESCRIBE TABLE EXTENDED doesn't give a deterministic name to the field where
+        # a foreign key constraint will be found in its output. So we cycle through its
+        # output looking for a match that includes "FOREIGN KEY". This is brittle. We
+        # could optionally make two roundtrips: the first would query information_schema
+        # for the name of the foreign key constraint on this table, and a second to
+        # DESCRIBE TABLE EXTENDED, at which point we would know the name of the constraint.
+        # But for now we instead assume that Python list comprehension is faster than a
+        # network roundtrip.
+        dte_dict = {row["col_name"]: row["data_type"] for row in result}
+        target = [(k, v) for k, v in dte_dict.items() if "FOREIGN KEY" in v]
+
+        def extract_constraint_dict_from_target(target):
+            if target:
+                name, _constraint_string = target
+                _extracted = extract_identifier_groups_from_string(_constraint_string)
+                constrained_columns_str, referred_columns_str = (
+                    _extracted[0],
+                    _extracted[1],
+                )
+
+                constrained_columns = extract_identifiers_from_string(
+                    constrained_columns_str
+                )
+                referred_columns = extract_identifiers_from_string(referred_columns_str)
+                referred_table = str(table_name)
+            else:
+                name, constrained_columns, referred_columns, referred_table = (
+                    None,
+                    None,
+                    None,
+                    None,
+                )
+
+            return {
+                "constrained_columns": constrained_columns,
+                "name": name,
+                "referred_table": referred_table,
+                "referred_columns": referred_columns,
+            }
+
+        if target:
+            return [extract_constraint_dict_from_target(i) for i in target]
+        else:
+            return []
 
     def get_indexes(self, connection, table_name, schema=None, **kw):
         """Return information about indexes in `table_name`.
@@ -238,6 +310,7 @@ def do_rollback(self, dbapi_connection):
         # Databricks SQL Does not support transactions
         pass
 
+    @reflection.cache
     def has_table(
         self, connection, table_name, schema=None, catalog=None, **kwargs
     ) -> bool:
@@ -252,7 +325,9 @@ def has_table(
 
         try:
             res = connection.execute(
-                sqlalchemy.text(f"DESCRIBE TABLE {_catalog}.{_schema}.{table_name}")
+                sqlalchemy.text(
+                    f"DESCRIBE TABLE `{_catalog}`.`{_schema}`.`{table_name}`"
+                )
             )
             return True
         except DatabaseError as e:
 
@@ -0,0 +1,69 @@
+import re
+from sqlalchemy.sql import compiler
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class DatabricksIdentifierPreparer(compiler.IdentifierPreparer):
+    """https://docs.databricks.com/en/sql/language-manual/sql-ref-identifiers.html"""
+
+    legal_characters = re.compile(r"^[A-Z0-9_]+$", re.I)
+
+    def __init__(self, dialect):
+        super().__init__(dialect, initial_quote="`")
+
+
+class DatabricksDDLCompiler(compiler.DDLCompiler):
+    def post_create_table(self, table):
+        return " USING DELTA"
+
+    def visit_unique_constraint(self, constraint, **kw):
+        logger.warn("Databricks does not support unique constraints")
+        pass
+
+    def visit_check_constraint(self, constraint, **kw):
+        logger.warn("Databricks does not support check constraints")
+        pass
+
+    def visit_identity_column(self, identity, **kw):
+        """When configuring an Identity() with Databricks, only the always option is supported.
+        All other options are ignored.
+
+        Note: IDENTITY columns must always be defined as BIGINT. An exception will be raised if INT is used.
+
+        https://www.databricks.com/blog/2022/08/08/identity-columns-to-generate-surrogate-keys-are-now-available-in-a-lakehouse-near-you.html
+        """
+        text = "GENERATED %s AS IDENTITY" % (
+            "ALWAYS" if identity.always else "BY DEFAULT",
+        )
+        return text
+
+    def get_column_specification(self, column, **kwargs):
+        """Currently we override this method only to emit a log message if a user attempts to set
+        autoincrement=True on a column. See comments in test_suite.py. We may implement implicit
+        IDENTITY using this feature in the future, similar to the Microsoft SQL Server dialect.
+        """
+        if column is column.table._autoincrement_column or column.autoincrement is True:
+            logger.warn(
+                "Databricks dialect ignores SQLAlchemy's autoincrement semantics. Use explicit Identity() instead."
+            )
+
+        return super().get_column_specification(column, **kwargs)
+
+
+class DatabricksStatementCompiler(compiler.SQLCompiler):
+    def limit_clause(self, select, **kw):
+        """Identical to the default implementation of SQLCompiler.limit_clause except it writes LIMIT ALL instead of LIMIT -1,
+        since Databricks SQL doesn't support the latter.
+
+        https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-limit.html
+        """
+        text = ""
+        if select._limit_clause is not None:
+            text += "\n LIMIT " + self.process(select._limit_clause, **kw)
+        if select._offset_clause is not None:
+            if select._limit_clause is None:
+                text += "\n LIMIT ALL"
+            text += " OFFSET " + self.process(select._offset_clause, **kw)
+        return text
@@ -9,6 +9,7 @@
 in test_suite.py with a Databricks-specific reason.
 
 See the special note about the array_type exclusion below.
+See special note about has_temp_table exclusion below.
 """
 
 import sqlalchemy.testing.requirements
@@ -93,4 +94,53 @@ def array_type(self):
         test runner will crash the pytest process due to an AttributeError
         """
 
+        # TODO: Implement array type using inline?
         return sqlalchemy.testing.exclusions.closed()
+
+    @property
+    def table_ddl_if_exists(self):
+        """target platform supports IF NOT EXISTS / IF EXISTS for tables."""
+
+        return sqlalchemy.testing.exclusions.open()
+
+    @property
+    def identity_columns(self):
+        """If a backend supports GENERATED { ALWAYS | BY DEFAULT }
+        AS IDENTITY"""
+        return sqlalchemy.testing.exclusions.open()
+
+    @property
+    def identity_columns_standard(self):
+        """If a backend supports GENERATED { ALWAYS | BY DEFAULT }
+        AS IDENTITY with a standard syntax.
+        This is mainly to exclude MSSql.
+        """
+        return sqlalchemy.testing.exclusions.open()
+
+    @property
+    def has_temp_table(self):
+        """target dialect supports checking a single temp table name
+
+        unfortunately this is not the same as temp_table_names
+
+        SQLAlchemy's HasTableTest is not normalised in such a way that temp table tests
+        are separate from temp view and normal table tests. If those tests were split out,
+        we would just add detailed skip markers in test_suite.py. But since we'd like to
+        run the HasTableTest group for the features we support, we must set this exclusinon
+        to closed().
+
+        It would be ideal if there were a separate requirement for has_temp_view. Without it,
+        we're in a bind.
+        """
+        return sqlalchemy.testing.exclusions.closed()
+    
+    @property
+    def temporary_views(self):
+        """target database supports temporary views"""
+        return sqlalchemy.testing.exclusions.open()
+    
+    @property
+    def views(self):
+        """Target database must support VIEWs."""
+
+        return sqlalchemy.testing.exclusions.open()