From 18343559598c2ba05ad30780a1de38086b883f57 Mon Sep 17 00:00:00 2001 From: Marcus Steinbach Date: Thu, 10 Jul 2025 05:21:44 +0200 Subject: [PATCH 01/11] snapshot --- src/sqlacodegen/risclog_generators.py | 163 ++++++++++++++++++-------- 1 file changed, 114 insertions(+), 49 deletions(-) diff --git a/src/sqlacodegen/risclog_generators.py b/src/sqlacodegen/risclog_generators.py index 98a8efb1..119b3208 100644 --- a/src/sqlacodegen/risclog_generators.py +++ b/src/sqlacodegen/risclog_generators.py @@ -642,64 +642,123 @@ def clx_generate_base(self: "TablesGenerator") -> None: TablesGenerator.generate_base = clx_generate_base # type: ignore[method-assign] -def clx_render_index(self: "TablesGenerator", index: Index) -> str: - elements = [] - opclass_map = {} - - if index.columns: - for col in index.columns: - elements.append(repr(col.name)) +# def clx_render_index(self: "TablesGenerator", index: Index) -> str: +# elements = [] +# opclass_map = {} + +# if index.columns: +# for col in index.columns: +# elements.append(repr(col.name)) + +# if ( +# "postgresql" in index.dialect_options +# and index.dialect_options["postgresql"].get("using") == "gin" +# and hasattr(col, "type") +# ): +# coltype = getattr(col.type, "python_type", None) +# if isinstance( +# col.type, (satypes.String, satypes.Text, satypes.Unicode) +# ) or (coltype and coltype is str): +# opclass_map[col.name] = "gin_trgm_ops" + +# elif getattr(index, "expressions", None): +# for expr in index.expressions: +# expr_str = str(expr).strip() +# elements.append(f"text({expr_str!r})") + +# if ( +# "postgresql" in index.dialect_options +# and index.dialect_options["postgresql"].get("using") == "gin" +# ): +# if ( +# "::tsvector" not in expr_str +# and "array" not in expr_str.lower() +# and "json" not in expr_str.lower() +# ): +# opclass_map[expr_str] = "gin_trgm_ops" + +# if not elements: +# print( +# f"# WARNING: Skipped index {getattr(index, 'name', None)!r} on table {getattr(index.table, 'name', None)!r} (no columns or expressions)." +# ) +# return "" + +# kwargs: dict[str, Any] = {} + +# if index.unique: +# kwargs["unique"] = True + +# if "postgresql" in index.dialect_options: +# dialect_opts = index.dialect_options["postgresql"] +# if "using" in dialect_opts: +# using = dialect_opts["using"] +# kwargs["postgresql_using"] = ( +# f"'{using}'" if isinstance(using, str) else using +# ) + +# if opclass_map: +# kwargs["postgresql_ops"] = opclass_map + +# return render_callable("Index", repr(index.name), *elements, kwargs=kwargs) + +RAW_SQL_INDEXES = [] - if ( - "postgresql" in index.dialect_options - and index.dialect_options["postgresql"].get("using") == "gin" - and hasattr(col, "type") - ): +def clx_render_index(self: "TablesGenerator", index: Index) -> str: + """ + Render SQLAlchemy Index for ORM if possible, otherwise collect as RAW SQL. + """ + # Normale Spaltenindizes + if index.columns and all(hasattr(col, "name") for col in index.columns): + opclass_map = {} + elements = [repr(col.name) for col in index.columns] + # GIN + Operator für Textspalten + if ( + "postgresql" in index.dialect_options + and index.dialect_options["postgresql"].get("using") == "gin" + ): + for col in index.columns: coltype = getattr(col.type, "python_type", None) - if isinstance( - col.type, (satypes.String, satypes.Text, satypes.Unicode) - ) or (coltype and coltype is str): - opclass_map[col.name] = "gin_trgm_ops" - - elif getattr(index, "expressions", None): - for expr in index.expressions: - expr_str = str(expr).strip() - elements.append(f"text({expr_str!r})") - - if ( - "postgresql" in index.dialect_options - and index.dialect_options["postgresql"].get("using") == "gin" - ): if ( - "::tsvector" not in expr_str - and "array" not in expr_str.lower() - and "json" not in expr_str.lower() + isinstance(col.type, (satypes.String, satypes.Text, satypes.Unicode)) + or (coltype and coltype is str) ): - opclass_map[expr_str] = "gin_trgm_ops" + opclass_map[col.name] = "gin_trgm_ops" - if not elements: - print( - f"# WARNING: Skipped index {getattr(index, 'name', None)!r} on table {getattr(index.table, 'name', None)!r} (no columns or expressions)." + kwargs = {} + if index.unique: + kwargs["unique"] = True + if "postgresql" in index.dialect_options: + using = index.dialect_options["postgresql"].get("using") + if using: + kwargs["postgresql_using"] = f"'{using}'" + if opclass_map: + kwargs["postgresql_ops"] = opclass_map + + return render_callable("Index", repr(index.name), *elements, kwargs=kwargs) + + # Ausdrücke im Index? (text/func etc.) + expressions = getattr(index, "expressions", None) + if expressions: + is_gin = ( + "postgresql" in index.dialect_options + and index.dialect_options["postgresql"].get("using") == "gin" ) + for expr in expressions: + expr_str = str(expr).strip() + table_name = getattr(index.table, "name", "") + if is_gin: + # GIN-Index auf Ausdruck – RAW SQL + sql = f"CREATE INDEX {index.name} ON {table_name} USING gin ({expr_str} gin_trgm_ops);" + else: + # Sonstige Index-Typen auf Ausdruck + sql = f"CREATE INDEX {index.name} ON {table_name} ({expr_str});" + RAW_SQL_INDEXES.append(sql) + # Niemals als Python-Index-Objekt zurückgeben return "" + # Falls alles schiefgeht: + return "" - kwargs: dict[str, Any] = {} - if index.unique: - kwargs["unique"] = True - - if "postgresql" in index.dialect_options: - dialect_opts = index.dialect_options["postgresql"] - if "using" in dialect_opts: - using = dialect_opts["using"] - kwargs["postgresql_using"] = ( - f"'{using}'" if isinstance(using, str) else using - ) - - if opclass_map: - kwargs["postgresql_ops"] = opclass_map - - return render_callable("Index", repr(index.name), *elements, kwargs=kwargs) TablesGenerator.render_index = clx_render_index # type: ignore[method-assign] @@ -1238,6 +1297,12 @@ def get_extension_object_names( all.append(model.name) rendered.append(self.render_class(model)) + # Am Ende des Generators (nach dem Rendern): + if RAW_SQL_INDEXES: + print("# Folgende Indizes müssen manuell (per Migration/SQL) erstellt werden:") + for sql in RAW_SQL_INDEXES: + print(sql) + elif table is not None: rendered.append(f"{model.name} = {self.render_table(model.table)}") From 3a064f825a83b2767f16590e75c13ba924d34270 Mon Sep 17 00:00:00 2001 From: Marcus Steinbach Date: Thu, 10 Jul 2025 05:56:21 +0200 Subject: [PATCH 02/11] feat: added pg_publication and pg_index --- src/sqlacodegen/cli.py | 18 ++ src/sqlacodegen/risclog_generators.py | 250 ++++++++++++++------------ 2 files changed, 158 insertions(+), 110 deletions(-) diff --git a/src/sqlacodegen/cli.py b/src/sqlacodegen/cli.py index 7de594a4..6a59a92a 100644 --- a/src/sqlacodegen/cli.py +++ b/src/sqlacodegen/cli.py @@ -30,7 +30,9 @@ parse_aggregate_row, parse_extension_row, parse_function_row, + parse_index_row, parse_policy_row, + parse_publication_row, parse_trigger_row, ) from sqlacodegen.seed_export import export_pgdata_py, get_table_dependency_order @@ -245,6 +247,22 @@ class ExportDict(TypedDict, total=False): "parse_row_func": parse_extension_row, "file": "pg_extensions.py", }, + { + "title": "Publications", + "entities_varname": "all_publications", + "template": "ALEMBIC_PUBLICATION_TEMPLATE", + "statement": "ALEMBIC_PUBLICATION_STATEMENT", + "parse_row_func": parse_publication_row, + "file": "pg_publications.py", + }, + { + "title": "Indexes", + "entities_varname": "all_indexes", + "template": "ALEMBIC_INDEX_TEMPLATE", + "statement": "ALEMBIC_INDEX_STATEMENT", + "parse_row_func": parse_index_row, + "file": "pg_indexes.py", + }, ] # ----------- Export-Loop ------------ diff --git a/src/sqlacodegen/risclog_generators.py b/src/sqlacodegen/risclog_generators.py index 119b3208..477fe786 100644 --- a/src/sqlacodegen/risclog_generators.py +++ b/src/sqlacodegen/risclog_generators.py @@ -104,6 +104,66 @@ class {classname}(PortalObject): # type: ignore[misc] definition=\"\"\"{definition}\"\"\", ) """ +ALEMBIC_PUBLICATION_TEMPLATE = """{varname} = PGPublication( + name={name!r}, + tables={tables!r}, + publish={publish!r}, +) +""" +ALEMBIC_INDEX_TEMPLATE = """{varname} = PGIndex( + name={name!r}, + table={table!r}, + columns={columns!r}, + using={using!r}, + opclass={opclass!r}, + unique={unique!r}, +) +""" +ALEMBIC_INDEX_STATEMENT = """ +SELECT + ix.indexrelid::regclass::text AS name, + t.relname AS tablename, + a.amname AS using, + ix.indisunique AS unique, + array_agg(pg_get_indexdef(ix.indexrelid, k + 1, TRUE)) AS columns, + array_agg( + CASE WHEN op.opcname IS NOT NULL THEN op.opcname ELSE NULL END + ) AS opclass +FROM + pg_index ix + JOIN pg_class t ON t.oid = ix.indrelid + JOIN pg_class i ON i.oid = ix.indexrelid + JOIN pg_am a ON a.oid = i.relam + LEFT JOIN LATERAL ( + SELECT unnest(ix.indkey) AS attnum, generate_subscripts(ix.indkey, 1) - 1 AS k + ) x ON TRUE + LEFT JOIN pg_attribute c ON c.attrelid = t.oid AND c.attnum = x.attnum + LEFT JOIN pg_opclass op ON op.oid = ANY(ix.indclass) +WHERE + NOT ix.indisprimary + AND t.relkind = 'r' + AND t.relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'public') +GROUP BY ix.indexrelid, t.relname, a.amname, ix.indisunique +ORDER BY name; +""" + +ALEMBIC_PUBLICATION_STATEMENT = """ +SELECT + pub.pubname AS name, + array_agg(nsp.nspname || '.' || tbl.relname) AS tables, + pub.pubinsert AS publish_insert, + pub.pubupdate AS publish_update, + pub.pubdelete AS publish_delete, + pub.pubtruncate AS publish_truncate +FROM + pg_publication pub + JOIN pg_publication_rel pr ON pub.oid = pr.prpubid + JOIN pg_class tbl ON pr.prrelid = tbl.oid + JOIN pg_namespace nsp ON tbl.relnamespace = nsp.oid +GROUP BY + pub.pubname, pub.pubinsert, pub.pubupdate, pub.pubdelete, pub.pubtruncate +ORDER BY pub.pubname; +""" ALEMBIC_FUNCTION_STATEMENT = """SELECT pg_get_functiondef(p.oid) AS func @@ -259,6 +319,8 @@ def finalize_alembic_utils( "all_sequences": "from alembic_utils.pg_sequence import PGSequence", "all_extensions": "from alembic_utils.pg_extension import PGExtension", "all_aggregates": "from alembic_utils.pg_aggregate import PGAggregate", + "all_publications": "from alembic_utils.pg_publication import PGPublication", + "all_indexes": "from alembic_utils.pg_index import PGIndex", } import_stmt = imports.get( entities_name or "all_views", @@ -271,6 +333,56 @@ def finalize_alembic_utils( return pg_alembic_definition +def parse_index_row( + row: dict[str, Any], template_def: str, schema: str | None = None +) -> tuple[str, str | Any]: + name = row["name"] + table = row["tablename"] + columns = row["columns"] + using = row["using"] if row.get("using") else None + unique = bool(row.get("unique", False)) + opclass_list = [opc for opc in row.get("opclass", []) if opc] + opclass = opclass_list[0] if opclass_list else None + + varname = name.lower() + code = template_def.format( + varname=varname, + name=name, + table=table, + columns=columns, + using=using, + opclass=opclass, + unique=unique, + ) + return code, varname + + +def parse_publication_row( + row: dict[str, Any], template_def: str, schema: str | None = None +) -> tuple[str, str | Any]: + name = row["name"] + tables = row["tables"] + pub_ops = [] + if row.get("publish_insert"): + pub_ops.append("insert") + if row.get("publish_update"): + pub_ops.append("update") + if row.get("publish_delete"): + pub_ops.append("delete") + if row.get("publish_truncate"): + pub_ops.append("truncate") + publish = ", ".join(pub_ops) or "insert, update, delete" + + varname = name.lower() + code = template_def.format( + varname=varname, + name=name, + tables=tables, + publish=publish, + ) + return code, varname + + def parse_function_row( row: dict[str, Any], template_def: str, schema: str | None ) -> tuple[str, str | Any]: @@ -297,12 +409,15 @@ def parse_function_row( schema = schema or "public" name = name.lower() - return template_def.format( - varname=name, - schema=schema, - signature=signature, - definition=unescape_sql_string(squash_whitespace(definition)), - ), name + return ( + template_def.format( + varname=name, + schema=schema, + signature=signature, + definition=unescape_sql_string(squash_whitespace(definition)), + ), + name, + ) def parse_policy_row( @@ -449,9 +564,11 @@ def parse_sequence_row( f"MINVALUE {row['minimum_value']}", f"MAXVALUE {row['maximum_value']}", f"CACHE {row['cache_size']}", - "CYCLE" - if str(row.get("cycle", "")).lower() in ("yes", "true", "on", "1") - else "NO CYCLE", + ( + "CYCLE" + if str(row.get("cycle", "")).lower() in ("yes", "true", "on", "1") + else "NO CYCLE" + ), ] definition = "\n ".join(parts) @@ -642,125 +759,39 @@ def clx_generate_base(self: "TablesGenerator") -> None: TablesGenerator.generate_base = clx_generate_base # type: ignore[method-assign] -# def clx_render_index(self: "TablesGenerator", index: Index) -> str: -# elements = [] -# opclass_map = {} - -# if index.columns: -# for col in index.columns: -# elements.append(repr(col.name)) - -# if ( -# "postgresql" in index.dialect_options -# and index.dialect_options["postgresql"].get("using") == "gin" -# and hasattr(col, "type") -# ): -# coltype = getattr(col.type, "python_type", None) -# if isinstance( -# col.type, (satypes.String, satypes.Text, satypes.Unicode) -# ) or (coltype and coltype is str): -# opclass_map[col.name] = "gin_trgm_ops" - -# elif getattr(index, "expressions", None): -# for expr in index.expressions: -# expr_str = str(expr).strip() -# elements.append(f"text({expr_str!r})") - -# if ( -# "postgresql" in index.dialect_options -# and index.dialect_options["postgresql"].get("using") == "gin" -# ): -# if ( -# "::tsvector" not in expr_str -# and "array" not in expr_str.lower() -# and "json" not in expr_str.lower() -# ): -# opclass_map[expr_str] = "gin_trgm_ops" - -# if not elements: -# print( -# f"# WARNING: Skipped index {getattr(index, 'name', None)!r} on table {getattr(index.table, 'name', None)!r} (no columns or expressions)." -# ) -# return "" - -# kwargs: dict[str, Any] = {} - -# if index.unique: -# kwargs["unique"] = True - -# if "postgresql" in index.dialect_options: -# dialect_opts = index.dialect_options["postgresql"] -# if "using" in dialect_opts: -# using = dialect_opts["using"] -# kwargs["postgresql_using"] = ( -# f"'{using}'" if isinstance(using, str) else using -# ) - -# if opclass_map: -# kwargs["postgresql_ops"] = opclass_map - -# return render_callable("Index", repr(index.name), *elements, kwargs=kwargs) - -RAW_SQL_INDEXES = [] - def clx_render_index(self: "TablesGenerator", index: Index) -> str: - """ - Render SQLAlchemy Index for ORM if possible, otherwise collect as RAW SQL. - """ - # Normale Spaltenindizes if index.columns and all(hasattr(col, "name") for col in index.columns): opclass_map = {} elements = [repr(col.name) for col in index.columns] - # GIN + Operator für Textspalten if ( "postgresql" in index.dialect_options and index.dialect_options["postgresql"].get("using") == "gin" ): for col in index.columns: coltype = getattr(col.type, "python_type", None) - if ( - isinstance(col.type, (satypes.String, satypes.Text, satypes.Unicode)) - or (coltype and coltype is str) - ): + if isinstance( + col.type, (satypes.String, satypes.Text, satypes.Unicode) + ) or (coltype and coltype is str): opclass_map[col.name] = "gin_trgm_ops" - kwargs = {} + kwargs: dict[str, object] = {} if index.unique: kwargs["unique"] = True if "postgresql" in index.dialect_options: using = index.dialect_options["postgresql"].get("using") if using: - kwargs["postgresql_using"] = f"'{using}'" + kwargs["postgresql_using"] = using if opclass_map: kwargs["postgresql_ops"] = opclass_map return render_callable("Index", repr(index.name), *elements, kwargs=kwargs) - # Ausdrücke im Index? (text/func etc.) expressions = getattr(index, "expressions", None) if expressions: - is_gin = ( - "postgresql" in index.dialect_options - and index.dialect_options["postgresql"].get("using") == "gin" - ) - for expr in expressions: - expr_str = str(expr).strip() - table_name = getattr(index.table, "name", "") - if is_gin: - # GIN-Index auf Ausdruck – RAW SQL - sql = f"CREATE INDEX {index.name} ON {table_name} USING gin ({expr_str} gin_trgm_ops);" - else: - # Sonstige Index-Typen auf Ausdruck - sql = f"CREATE INDEX {index.name} ON {table_name} ({expr_str});" - RAW_SQL_INDEXES.append(sql) - # Niemals als Python-Index-Objekt zurückgeben return "" - # Falls alles schiefgeht: return "" - - TablesGenerator.render_index = clx_render_index # type: ignore[method-assign] @@ -1203,7 +1234,8 @@ def get_extension_object_names( extension_objs = set() for schema in schemas: result = conn.execute( - text(""" + text( + """ SELECT c.relname FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid @@ -1214,7 +1246,8 @@ def get_extension_object_names( JOIN pg_extension e ON d.refobjid = e.oid WHERE d.objid = c.oid AND d.deptype = 'e' ) - """), + """ + ), {"schema": schema}, ) extension_objs |= {row[0] for row in result} @@ -1297,11 +1330,6 @@ def get_extension_object_names( all.append(model.name) rendered.append(self.render_class(model)) - # Am Ende des Generators (nach dem Rendern): - if RAW_SQL_INDEXES: - print("# Folgende Indizes müssen manuell (per Migration/SQL) erstellt werden:") - for sql in RAW_SQL_INDEXES: - print(sql) elif table is not None: rendered.append(f"{model.name} = {self.render_table(model.table)}") @@ -1315,6 +1343,8 @@ def get_extension_object_names( self.add_literal_import("sqlalchemy", "text") self.add_literal_import("sqlalchemy", "FetchedValue") - return "\n\n".join(rendered), finalize_alembic_utils( - pg_alembic_definition, entities, entities_name - ) if pg_alembic_definition else None + return "\n\n".join(rendered), ( + finalize_alembic_utils(pg_alembic_definition, entities, entities_name) + if pg_alembic_definition + else None + ) From 19407dd30382245144caad79833bb5c23b4bfb8f Mon Sep 17 00:00:00 2001 From: Marcus Steinbach Date: Thu, 10 Jul 2025 06:15:30 +0200 Subject: [PATCH 03/11] fix: remove special indexes in orm tables --- src/sqlacodegen/risclog_generators.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/sqlacodegen/risclog_generators.py b/src/sqlacodegen/risclog_generators.py index 477fe786..13a066a2 100644 --- a/src/sqlacodegen/risclog_generators.py +++ b/src/sqlacodegen/risclog_generators.py @@ -795,6 +795,18 @@ def clx_render_index(self: "TablesGenerator", index: Index) -> str: TablesGenerator.render_index = clx_render_index # type: ignore[method-assign] +def is_special_index(index: Index) -> bool: + if "postgresql" in index.dialect_options: + using = index.dialect_options["postgresql"].get("using") + if using and using != "btree": + return True + if index.dialect_options["postgresql"].get("ops"): + return True + if getattr(index, "expressions", None): + return True + return False + + def clx_render_table(self: "TablesGenerator", table: Table) -> str: args: list[str] = [f"{table.name!r}, {self.base.metadata_ref}"] kwargs: dict[str, object] = {} @@ -810,7 +822,10 @@ def clx_render_table(self: "TablesGenerator", table: Table) -> str: continue args.append(self.render_constraint(constraint)) + for index in sorted(table.indexes, key=lambda i: str(i.name or "")): + if is_special_index(index): + continue if len(index.columns) > 1 or not uses_default_name(index): idx_code = self.render_index(index) if idx_code.strip() and idx_code is not None: @@ -1162,6 +1177,8 @@ def render_table_args(self, table: Table) -> str: args.append(self.render_constraint(constraint)) for index in sorted(table.indexes, key=lambda i: str(i.name or "")): + if is_special_index(index): + continue if len(index.columns) > 1 or not uses_default_name(index): idx_code = self.render_index(index) if idx_code.strip() and idx_code is not None: From e7014f15e04d1af03d01d0b31dacaed616789efe Mon Sep 17 00:00:00 2001 From: Marcus Steinbach Date: Thu, 10 Jul 2025 06:15:54 +0200 Subject: [PATCH 04/11] chore: pre commit --- src/sqlacodegen/risclog_generators.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sqlacodegen/risclog_generators.py b/src/sqlacodegen/risclog_generators.py index 13a066a2..85193310 100644 --- a/src/sqlacodegen/risclog_generators.py +++ b/src/sqlacodegen/risclog_generators.py @@ -822,7 +822,6 @@ def clx_render_table(self: "TablesGenerator", table: Table) -> str: continue args.append(self.render_constraint(constraint)) - for index in sorted(table.indexes, key=lambda i: str(i.name or "")): if is_special_index(index): continue From ed707f8deb7b335111adddeb35ac87bbcb522d6f Mon Sep 17 00:00:00 2001 From: Marcus Steinbach Date: Thu, 10 Jul 2025 14:07:22 +0200 Subject: [PATCH 05/11] fix: pg_indexes --- src/sqlacodegen/risclog_generators.py | 342 ++++++++++++++------------ 1 file changed, 186 insertions(+), 156 deletions(-) diff --git a/src/sqlacodegen/risclog_generators.py b/src/sqlacodegen/risclog_generators.py index 85193310..e480cd71 100644 --- a/src/sqlacodegen/risclog_generators.py +++ b/src/sqlacodegen/risclog_generators.py @@ -1,6 +1,6 @@ import re from pprint import pformat -from typing import TYPE_CHECKING, Any, Callable, cast +from typing import TYPE_CHECKING, Any, Callable, Optional, cast from sqlalchemy import ( Column, @@ -110,61 +110,23 @@ class {classname}(PortalObject): # type: ignore[misc] publish={publish!r}, ) """ -ALEMBIC_INDEX_TEMPLATE = """{varname} = PGIndex( - name={name!r}, - table={table!r}, - columns={columns!r}, - using={using!r}, - opclass={opclass!r}, - unique={unique!r}, -) -""" -ALEMBIC_INDEX_STATEMENT = """ -SELECT - ix.indexrelid::regclass::text AS name, - t.relname AS tablename, - a.amname AS using, - ix.indisunique AS unique, - array_agg(pg_get_indexdef(ix.indexrelid, k + 1, TRUE)) AS columns, - array_agg( - CASE WHEN op.opcname IS NOT NULL THEN op.opcname ELSE NULL END - ) AS opclass -FROM - pg_index ix - JOIN pg_class t ON t.oid = ix.indrelid - JOIN pg_class i ON i.oid = ix.indexrelid - JOIN pg_am a ON a.oid = i.relam - LEFT JOIN LATERAL ( - SELECT unnest(ix.indkey) AS attnum, generate_subscripts(ix.indkey, 1) - 1 AS k - ) x ON TRUE - LEFT JOIN pg_attribute c ON c.attrelid = t.oid AND c.attnum = x.attnum - LEFT JOIN pg_opclass op ON op.oid = ANY(ix.indclass) -WHERE - NOT ix.indisprimary - AND t.relkind = 'r' - AND t.relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'public') -GROUP BY ix.indexrelid, t.relname, a.amname, ix.indisunique -ORDER BY name; -""" - ALEMBIC_PUBLICATION_STATEMENT = """ SELECT - pub.pubname AS name, - array_agg(nsp.nspname || '.' || tbl.relname) AS tables, - pub.pubinsert AS publish_insert, - pub.pubupdate AS publish_update, - pub.pubdelete AS publish_delete, - pub.pubtruncate AS publish_truncate + p.pubname, + array_remove(array_agg(pt.relname), NULL) as tables, + ( + CASE WHEN p.pubinsert THEN 'insert' ELSE '' END || + CASE WHEN p.pubupdate THEN ', update' ELSE '' END || + CASE WHEN p.pubdelete THEN ', delete' ELSE '' END || + CASE WHEN p.pubtruncate THEN ', truncate' ELSE '' + END + ) as publish FROM - pg_publication pub - JOIN pg_publication_rel pr ON pub.oid = pr.prpubid - JOIN pg_class tbl ON pr.prrelid = tbl.oid - JOIN pg_namespace nsp ON tbl.relnamespace = nsp.oid -GROUP BY - pub.pubname, pub.pubinsert, pub.pubupdate, pub.pubdelete, pub.pubtruncate -ORDER BY pub.pubname; + pg_publication p + LEFT JOIN pg_publication_rel pr ON pr.prpubid = p.oid + LEFT JOIN pg_class pt ON pt.oid = pr.prrelid +GROUP BY p.pubname, p.pubinsert, p.pubupdate, p.pubdelete, p.pubtruncate """ - ALEMBIC_FUNCTION_STATEMENT = """SELECT pg_get_functiondef(p.oid) AS func FROM @@ -186,7 +148,6 @@ class {classname}(PortalObject): # type: ignore[misc] p.proname; """ - ALEMBIC_POLICIES_STATEMENT = """SELECT pol.polname AS policy_name, ns.nspname AS schema_name, @@ -306,6 +267,108 @@ class {classname}(PortalObject): # type: ignore[misc] """ +ALEMBIC_INDEX_STATEMENT = """ +SELECT + c.relname AS index_name, + t.relname AS table_name, + n.nspname AS schema_name, + pg_get_indexdef(c.oid) AS definition +FROM + pg_class c + JOIN pg_namespace n ON n.oid = c.relnamespace + JOIN pg_index i ON i.indexrelid = c.oid + JOIN pg_class t ON t.oid = i.indrelid +WHERE + c.relkind = 'i' + AND n.nspname = :schema +ORDER BY + index_name; +""" +ALEMBIC_INDEX_TEMPLATE = """{varname} = PGIndex( + schema={schema!r}, + signature={signature!r}, + definition=\"\"\"{definition}\"\"\", + table={table!r}, + index_name={index_name!r}, +) +""" +SPECIAL_INDEX_PATTERNS = [ + # 1. Nicht-btree USING-Klausel + r"\bUSING\s+(?!btree\b)[a-zA-Z_]+", + # 2. Funktions- oder Expressions-Index (z.B. upper(...), lower(...), coalesce(...), etc.) + r"\((?:\s*[a-zA-Z_]+\s*\([^)]+\))", + # 3. Operator Class (z.B. gin_trgm_ops) + r"(\bgin_trgm_ops\b|\bgist_trgm_ops\b|\bhash_ops\b|\bjsonb_path_ops\b|\btext_pattern_ops\b)", + # 4. OPCLASS oder OPS explizit (SQLAlchemy kennt keine dialektübergreifende Syntax dafür) + r"ops\s*=", + r"opclass\s*=", + # 5. WITH (...) Storage-Parameter (Postgres) + r"\bWITH\s*\([^)]+\)", + # 6. WHERE-Klausel (Partial Index) + r"\bWHERE\b", + # 7. ASC/DESC/NULLS FIRST/LAST an einzelnen Indexspalten (selten in ORM gepflegt) + r"\bASC\b|\bDESC\b|\bNULLS\s+(FIRST|LAST)\b", +] + + +def is_special_index_definition( + definition: str, extra_patterns: Optional[list[str]] = None +) -> bool: + patterns = SPECIAL_INDEX_PATTERNS + (extra_patterns or []) + for pat in patterns: + if re.search(pat, definition, re.IGNORECASE): + return True + return False + + +def parse_index_row( + row: dict[str, str], template_def: str, schema: str | None +) -> tuple[str, str] | None: + definition = row["definition"] + if not is_special_index_definition(definition): + return None # Nur spezielle Indizes ausgeben! + + index_name = row["index_name"] + table_name = row["table_name"] + schema_name = row.get("schema_name", schema) or "public" + + varname = f"{index_name}_{table_name}".lower() + signature = f"{index_name} ON {table_name}" + + code = template_def.format( + varname=varname, + schema=schema_name, + signature=signature, + definition=definition, + table=table_name, + index_name=index_name, + ) + return code, varname + + +def parse_publication_row( + row: dict[str, Any], + template_def: str, + schema: str | None, +) -> tuple[str, str] | None: + name = row.get("pubname") + tables = row.get("tables") or [] + if isinstance(tables, str): + tables = [t.strip() for t in tables.split(",") if t.strip()] + publish = row.get("publish") or "" + owner = row.get("owner") or None + + varname = f"{name}".lower() + code = template_def.format( + varname=varname, + name=name, + tables=tables, + publish=publish, + owner=owner, + ) + return code, varname + + def finalize_alembic_utils( pg_alembic_definition: list[str], entities: list[str], @@ -319,8 +382,8 @@ def finalize_alembic_utils( "all_sequences": "from alembic_utils.pg_sequence import PGSequence", "all_extensions": "from alembic_utils.pg_extension import PGExtension", "all_aggregates": "from alembic_utils.pg_aggregate import PGAggregate", - "all_publications": "from alembic_utils.pg_publication import PGPublication", - "all_indexes": "from alembic_utils.pg_index import PGIndex", + "all_publications": "from risclog.claimxdb.alembic.object_ops import PGPublication", + "all_indices": "from risclog.claimxdb.alembic.object_ops import PGIndex", } import_stmt = imports.get( entities_name or "all_views", @@ -333,56 +396,6 @@ def finalize_alembic_utils( return pg_alembic_definition -def parse_index_row( - row: dict[str, Any], template_def: str, schema: str | None = None -) -> tuple[str, str | Any]: - name = row["name"] - table = row["tablename"] - columns = row["columns"] - using = row["using"] if row.get("using") else None - unique = bool(row.get("unique", False)) - opclass_list = [opc for opc in row.get("opclass", []) if opc] - opclass = opclass_list[0] if opclass_list else None - - varname = name.lower() - code = template_def.format( - varname=varname, - name=name, - table=table, - columns=columns, - using=using, - opclass=opclass, - unique=unique, - ) - return code, varname - - -def parse_publication_row( - row: dict[str, Any], template_def: str, schema: str | None = None -) -> tuple[str, str | Any]: - name = row["name"] - tables = row["tables"] - pub_ops = [] - if row.get("publish_insert"): - pub_ops.append("insert") - if row.get("publish_update"): - pub_ops.append("update") - if row.get("publish_delete"): - pub_ops.append("delete") - if row.get("publish_truncate"): - pub_ops.append("truncate") - publish = ", ".join(pub_ops) or "insert, update, delete" - - varname = name.lower() - code = template_def.format( - varname=varname, - name=name, - tables=tables, - publish=publish, - ) - return code, varname - - def parse_function_row( row: dict[str, Any], template_def: str, schema: str | None ) -> tuple[str, str | Any]: @@ -509,7 +522,6 @@ def parse_aggregate_row( initcond = row.get("initcond") schema_val = schema or row.get("schema") or "public" - # Baue die Definition als lesbare String-Config: definition_parts = [] if sfunc: definition_parts.append(f"SFUNC = {sfunc}") @@ -759,52 +771,78 @@ def clx_generate_base(self: "TablesGenerator") -> None: TablesGenerator.generate_base = clx_generate_base # type: ignore[method-assign] +def is_special_index(index: Index) -> bool: + from sqlalchemy.sql.elements import TextClause + + if any(isinstance(expr, TextClause) for expr in getattr(index, "expressions", [])): + return True + opts = getattr(index, "dialect_options", {}).get("postgresql", {}) + if opts.get("using") or opts.get("ops"): + return True + return False + + def clx_render_index(self: "TablesGenerator", index: Index) -> str: - if index.columns and all(hasattr(col, "name") for col in index.columns): - opclass_map = {} - elements = [repr(col.name) for col in index.columns] - if ( - "postgresql" in index.dialect_options - and index.dialect_options["postgresql"].get("using") == "gin" - ): - for col in index.columns: + elements = [] + opclass_map = {} + + if index.columns: + for col in index.columns: + elements.append(repr(col.name)) + + if ( + "postgresql" in index.dialect_options + and index.dialect_options["postgresql"].get("using") == "gin" + and hasattr(col, "type") + ): coltype = getattr(col.type, "python_type", None) if isinstance( col.type, (satypes.String, satypes.Text, satypes.Unicode) ) or (coltype and coltype is str): opclass_map[col.name] = "gin_trgm_ops" - kwargs: dict[str, object] = {} - if index.unique: - kwargs["unique"] = True - if "postgresql" in index.dialect_options: - using = index.dialect_options["postgresql"].get("using") - if using: - kwargs["postgresql_using"] = using - if opclass_map: - kwargs["postgresql_ops"] = opclass_map + elif getattr(index, "expressions", None): + for expr in index.expressions: + expr_str = str(expr).strip() + elements.append(f"text({expr_str!r})") - return render_callable("Index", repr(index.name), *elements, kwargs=kwargs) + if ( + "postgresql" in index.dialect_options + and index.dialect_options["postgresql"].get("using") == "gin" + ): + if ( + "::tsvector" not in expr_str + and "array" not in expr_str.lower() + and "json" not in expr_str.lower() + ): + opclass_map[expr_str] = "gin_trgm_ops" - expressions = getattr(index, "expressions", None) - if expressions: + if not elements: + print( + f"# WARNING: Skipped index {getattr(index, 'name', None)!r} on table {getattr(index.table, 'name', None)!r} (no columns or expressions)." + ) return "" - return "" - -TablesGenerator.render_index = clx_render_index # type: ignore[method-assign] + kwargs: dict[str, Any] = {} + if index.unique: + kwargs["unique"] = True -def is_special_index(index: Index) -> bool: if "postgresql" in index.dialect_options: - using = index.dialect_options["postgresql"].get("using") - if using and using != "btree": - return True - if index.dialect_options["postgresql"].get("ops"): - return True - if getattr(index, "expressions", None): - return True - return False + dialect_opts = index.dialect_options["postgresql"] + if "using" in dialect_opts: + using = dialect_opts["using"] + kwargs["postgresql_using"] = ( + f"'{using}'" if isinstance(using, str) else using + ) + + if opclass_map: + kwargs["postgresql_ops"] = opclass_map + + return render_callable("Index", repr(index.name), *elements, kwargs=kwargs) + + +TablesGenerator.render_index = clx_render_index # type: ignore[method-assign] def clx_render_table(self: "TablesGenerator", table: Table) -> str: @@ -822,9 +860,9 @@ def clx_render_table(self: "TablesGenerator", table: Table) -> str: continue args.append(self.render_constraint(constraint)) - for index in sorted(table.indexes, key=lambda i: str(i.name or "")): - if is_special_index(index): - continue + normal_indices = [idx for idx in table.indexes if not is_special_index(idx)] + for index in sorted(normal_indices, key=lambda i: str(i.name or "")): + # for index in sorted(table.indexes, key=lambda i: str(i.name or "")): if len(index.columns) > 1 or not uses_default_name(index): idx_code = self.render_index(index) if idx_code.strip() and idx_code is not None: @@ -1015,7 +1053,6 @@ def get_table_managed_sequences(metadata: MetaData) -> set[str]: for column in table.columns: default = getattr(column, "default", None) if default is not None: - # Sequence kann als Default o. direkt als ServerDefault hinterlegt sein if hasattr(default, "name"): seq_names.add(default.name) if hasattr(column, "sequence") and column.sequence is not None: @@ -1039,10 +1076,8 @@ def generate_alembic_utils_sequences( sql = globals()[statement] template_def = globals()[template] - - # Hole alle aus DB result: list[dict[str, Any]] = fetch_all_mappings(conn, sql, {"schema": schema}) - # Finde alle, die von Tables verwaltet werden + entities = [ parsed for row in result @@ -1174,10 +1209,9 @@ def render_table_args(self, table: Table) -> str: ): continue args.append(self.render_constraint(constraint)) - - for index in sorted(table.indexes, key=lambda i: str(i.name or "")): - if is_special_index(index): - continue + normal_indices = [idx for idx in table.indexes if not is_special_index(idx)] + for index in sorted(normal_indices, key=lambda i: str(i.name or "")): + # for index in sorted(table.indexes, key=lambda i: str(i.name or "")): if len(index.columns) > 1 or not uses_default_name(index): idx_code = self.render_index(index) if idx_code.strip() and idx_code is not None: @@ -1243,7 +1277,6 @@ def render_models(self, models: list[Model]) -> tuple[str, list[str] | None]: # "ARRAY": ("sqlalchemy", "ARRAY"), } - # Ergänzung: Ermittlung aller Extension-Objekte (Tabellen, Views, etc.) def get_extension_object_names( conn: Connection, schemas: set[str | None] ) -> Any: @@ -1269,7 +1302,6 @@ def get_extension_object_names( extension_objs |= {row[0] for row in result} return extension_objs - # Hole nur einmal die Extension-Objekte aus der DB conn = self.bind.connect() if hasattr(self.bind, "connect") else self.bind EXTENSION_OBJECTS = get_extension_object_names(conn, schemas) @@ -1314,13 +1346,12 @@ def get_extension_object_names( schema = table.schema schema_views = views_by_schema.get(schema, set()) - # **Hier: Filter für System- und Extension-Objekte** if table.schema and table.schema.startswith("pg_"): - continue # Skip Postgres System-Views + continue if table.name.startswith("pg_"): - continue # Skip system views + continue if table.name in EXTENSION_OBJECTS: - continue # Skip Extension-Objekte + continue for col in table.columns: sa_type = sa_type_from_column(col) @@ -1346,7 +1377,6 @@ def get_extension_object_names( all.append(model.name) rendered.append(self.render_class(model)) - elif table is not None: rendered.append(f"{model.name} = {self.render_table(model.table)}") From e46544c21e67134addee62ec5cdc84c645a70ebe Mon Sep 17 00:00:00 2001 From: Marcus Steinbach Date: Fri, 11 Jul 2025 08:08:22 +0200 Subject: [PATCH 06/11] snapshot --- src/sqlacodegen/risclog_generators.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/sqlacodegen/risclog_generators.py b/src/sqlacodegen/risclog_generators.py index e480cd71..832f857b 100644 --- a/src/sqlacodegen/risclog_generators.py +++ b/src/sqlacodegen/risclog_generators.py @@ -272,17 +272,21 @@ class {classname}(PortalObject): # type: ignore[misc] c.relname AS index_name, t.relname AS table_name, n.nspname AS schema_name, - pg_get_indexdef(c.oid) AS definition + pg_get_indexdef(c.oid) AS definition, + (con.oid IS NOT NULL) AS is_constraint, + con.contype FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace JOIN pg_index i ON i.indexrelid = c.oid JOIN pg_class t ON t.oid = i.indrelid + LEFT JOIN pg_constraint con ON con.conindid = c.oid WHERE c.relkind = 'i' AND n.nspname = :schema ORDER BY - index_name; + index_name + """ ALEMBIC_INDEX_TEMPLATE = """{varname} = PGIndex( schema={schema!r}, @@ -290,8 +294,10 @@ class {classname}(PortalObject): # type: ignore[misc] definition=\"\"\"{definition}\"\"\", table={table!r}, index_name={index_name!r}, + is_constraint={is_constraint!r}, ) """ + SPECIAL_INDEX_PATTERNS = [ # 1. Nicht-btree USING-Klausel r"\bUSING\s+(?!btree\b)[a-zA-Z_]+", @@ -324,6 +330,12 @@ def is_special_index_definition( def parse_index_row( row: dict[str, str], template_def: str, schema: str | None ) -> tuple[str, str] | None: + contype = row.get("contype") + # NUR echte Indexe, KEINE PK/UNIQUE-Constraint-indizes + if contype in ("p", "u", "x"): # x = exclude + print(f"SKIP CONSTRAINT INDEX: {row['index_name']} on {row['table_name']} (contype={contype})") + return None + definition = row["definition"] if not is_special_index_definition(definition): return None # Nur spezielle Indizes ausgeben! @@ -331,7 +343,8 @@ def parse_index_row( index_name = row["index_name"] table_name = row["table_name"] schema_name = row.get("schema_name", schema) or "public" - + is_constraint = row.get("is_constraint", False) + #print("######### DEBUG: Parsing index row #########",is_constraint) varname = f"{index_name}_{table_name}".lower() signature = f"{index_name} ON {table_name}" @@ -342,6 +355,7 @@ def parse_index_row( definition=definition, table=table_name, index_name=index_name, + is_constraint=is_constraint, ) return code, varname @@ -383,7 +397,7 @@ def finalize_alembic_utils( "all_extensions": "from alembic_utils.pg_extension import PGExtension", "all_aggregates": "from alembic_utils.pg_aggregate import PGAggregate", "all_publications": "from risclog.claimxdb.alembic.object_ops import PGPublication", - "all_indices": "from risclog.claimxdb.alembic.object_ops import PGIndex", + "all_indexes": "from risclog.claimxdb.alembic.object_ops import PGIndex", } import_stmt = imports.get( entities_name or "all_views", From 33971836122a70167806ab6fdcc35faa5f09dfee Mon Sep 17 00:00:00 2001 From: Marcus Steinbach Date: Fri, 11 Jul 2025 09:39:15 +0200 Subject: [PATCH 07/11] Fix SQLAlchemy model and index generation for correct table/index DDL - Fixed incorrect generation of Index objects using qualified column names or Text() objects, which led to broken SQL DDL and database errors. - Ensured all Index columns are created with unqualified column names and as actual Column objects, not Text() expressions, resolving issues with CREATE INDEX syntax. - Added logic to correctly handle special Postgres index operator classes (e.g., gin_trgm_ops) when needed, ensuring compatibility with extensions like pg_trgm. - Refactored index handling to avoid assigning to read-only properties (e.g., Index.columns), instead removing and recreating indexes as required. - Improved generator output to set table schema and comments where necessary. - General cleanup and clarification of index and table argument rendering. Fixes issues with model-generated DDL not matching the database schema and errors when running Base.metadata.create_all(). --- src/sqlacodegen/cli.py | 9 -- src/sqlacodegen/risclog_generators.py | 211 ++++++++------------------ 2 files changed, 62 insertions(+), 158 deletions(-) diff --git a/src/sqlacodegen/cli.py b/src/sqlacodegen/cli.py index 6a59a92a..0479e041 100644 --- a/src/sqlacodegen/cli.py +++ b/src/sqlacodegen/cli.py @@ -30,7 +30,6 @@ parse_aggregate_row, parse_extension_row, parse_function_row, - parse_index_row, parse_policy_row, parse_publication_row, parse_trigger_row, @@ -255,14 +254,6 @@ class ExportDict(TypedDict, total=False): "parse_row_func": parse_publication_row, "file": "pg_publications.py", }, - { - "title": "Indexes", - "entities_varname": "all_indexes", - "template": "ALEMBIC_INDEX_TEMPLATE", - "statement": "ALEMBIC_INDEX_STATEMENT", - "parse_row_func": parse_index_row, - "file": "pg_indexes.py", - }, ] # ----------- Export-Loop ------------ diff --git a/src/sqlacodegen/risclog_generators.py b/src/sqlacodegen/risclog_generators.py index 832f857b..70e24270 100644 --- a/src/sqlacodegen/risclog_generators.py +++ b/src/sqlacodegen/risclog_generators.py @@ -1,6 +1,6 @@ import re from pprint import pformat -from typing import TYPE_CHECKING, Any, Callable, Optional, cast +from typing import TYPE_CHECKING, Any, Callable, cast from sqlalchemy import ( Column, @@ -267,99 +267,6 @@ class {classname}(PortalObject): # type: ignore[misc] """ -ALEMBIC_INDEX_STATEMENT = """ -SELECT - c.relname AS index_name, - t.relname AS table_name, - n.nspname AS schema_name, - pg_get_indexdef(c.oid) AS definition, - (con.oid IS NOT NULL) AS is_constraint, - con.contype -FROM - pg_class c - JOIN pg_namespace n ON n.oid = c.relnamespace - JOIN pg_index i ON i.indexrelid = c.oid - JOIN pg_class t ON t.oid = i.indrelid - LEFT JOIN pg_constraint con ON con.conindid = c.oid -WHERE - c.relkind = 'i' - AND n.nspname = :schema -ORDER BY - index_name - -""" -ALEMBIC_INDEX_TEMPLATE = """{varname} = PGIndex( - schema={schema!r}, - signature={signature!r}, - definition=\"\"\"{definition}\"\"\", - table={table!r}, - index_name={index_name!r}, - is_constraint={is_constraint!r}, -) -""" - -SPECIAL_INDEX_PATTERNS = [ - # 1. Nicht-btree USING-Klausel - r"\bUSING\s+(?!btree\b)[a-zA-Z_]+", - # 2. Funktions- oder Expressions-Index (z.B. upper(...), lower(...), coalesce(...), etc.) - r"\((?:\s*[a-zA-Z_]+\s*\([^)]+\))", - # 3. Operator Class (z.B. gin_trgm_ops) - r"(\bgin_trgm_ops\b|\bgist_trgm_ops\b|\bhash_ops\b|\bjsonb_path_ops\b|\btext_pattern_ops\b)", - # 4. OPCLASS oder OPS explizit (SQLAlchemy kennt keine dialektübergreifende Syntax dafür) - r"ops\s*=", - r"opclass\s*=", - # 5. WITH (...) Storage-Parameter (Postgres) - r"\bWITH\s*\([^)]+\)", - # 6. WHERE-Klausel (Partial Index) - r"\bWHERE\b", - # 7. ASC/DESC/NULLS FIRST/LAST an einzelnen Indexspalten (selten in ORM gepflegt) - r"\bASC\b|\bDESC\b|\bNULLS\s+(FIRST|LAST)\b", -] - - -def is_special_index_definition( - definition: str, extra_patterns: Optional[list[str]] = None -) -> bool: - patterns = SPECIAL_INDEX_PATTERNS + (extra_patterns or []) - for pat in patterns: - if re.search(pat, definition, re.IGNORECASE): - return True - return False - - -def parse_index_row( - row: dict[str, str], template_def: str, schema: str | None -) -> tuple[str, str] | None: - contype = row.get("contype") - # NUR echte Indexe, KEINE PK/UNIQUE-Constraint-indizes - if contype in ("p", "u", "x"): # x = exclude - print(f"SKIP CONSTRAINT INDEX: {row['index_name']} on {row['table_name']} (contype={contype})") - return None - - definition = row["definition"] - if not is_special_index_definition(definition): - return None # Nur spezielle Indizes ausgeben! - - index_name = row["index_name"] - table_name = row["table_name"] - schema_name = row.get("schema_name", schema) or "public" - is_constraint = row.get("is_constraint", False) - #print("######### DEBUG: Parsing index row #########",is_constraint) - varname = f"{index_name}_{table_name}".lower() - signature = f"{index_name} ON {table_name}" - - code = template_def.format( - varname=varname, - schema=schema_name, - signature=signature, - definition=definition, - table=table_name, - index_name=index_name, - is_constraint=is_constraint, - ) - return code, varname - - def parse_publication_row( row: dict[str, Any], template_def: str, @@ -397,7 +304,6 @@ def finalize_alembic_utils( "all_extensions": "from alembic_utils.pg_extension import PGExtension", "all_aggregates": "from alembic_utils.pg_aggregate import PGAggregate", "all_publications": "from risclog.claimxdb.alembic.object_ops import PGPublication", - "all_indexes": "from risclog.claimxdb.alembic.object_ops import PGIndex", } import_stmt = imports.get( entities_name or "all_views", @@ -785,59 +691,60 @@ def clx_generate_base(self: "TablesGenerator") -> None: TablesGenerator.generate_base = clx_generate_base # type: ignore[method-assign] -def is_special_index(index: Index) -> bool: - from sqlalchemy.sql.elements import TextClause - - if any(isinstance(expr, TextClause) for expr in getattr(index, "expressions", [])): - return True - opts = getattr(index, "dialect_options", {}).get("postgresql", {}) - if opts.get("using") or opts.get("ops"): - return True - return False +def unqualify(colname: str) -> str: + if isinstance(colname, str): + return colname.split(".")[-1] + return str(colname) def clx_render_index(self: "TablesGenerator", index: Index) -> str: - elements = [] + from sqlalchemy.sql.elements import TextClause + + args = [repr(index.name)] + kwargs: dict[str, Any] = {} opclass_map = {} - if index.columns: + # --- Columns --- + if getattr(index, "columns", None) and len(index.columns) > 0: for col in index.columns: - elements.append(repr(col.name)) - + args.append(repr(unqualify(col.name))) + # Operator-Class GIN/TRGM if ( "postgresql" in index.dialect_options and index.dialect_options["postgresql"].get("using") == "gin" - and hasattr(col, "type") ): coltype = getattr(col.type, "python_type", None) if isinstance( col.type, (satypes.String, satypes.Text, satypes.Unicode) ) or (coltype and coltype is str): - opclass_map[col.name] = "gin_trgm_ops" - - elif getattr(index, "expressions", None): + opclass_map[unqualify(col.name)] = "gin_trgm_ops" + # --- Expressions/TextClause --- + elif getattr(index, "expressions", None) and len(index.expressions) > 0: for expr in index.expressions: - expr_str = str(expr).strip() - elements.append(f"text({expr_str!r})") - - if ( - "postgresql" in index.dialect_options - and index.dialect_options["postgresql"].get("using") == "gin" - ): + if isinstance(expr, TextClause): + expr_str = str(expr) + # GIN/TRGM als Suffix if ( - "::tsvector" not in expr_str - and "array" not in expr_str.lower() - and "json" not in expr_str.lower() + "postgresql" in index.dialect_options + and index.dialect_options["postgresql"].get("using") == "gin" + and not expr_str.rstrip().endswith("gin_trgm_ops") ): - opclass_map[expr_str] = "gin_trgm_ops" - - if not elements: - print( - f"# WARNING: Skipped index {getattr(index, 'name', None)!r} on table {getattr(index.table, 'name', None)!r} (no columns or expressions)." - ) - return "" - - kwargs: dict[str, Any] = {} + expr_str = f"{expr_str} gin_trgm_ops" + args.append(f"text({expr_str!r})") + else: + expr_str = str(expr) + m = re.match(r"^upper\(\((\w+)\)::text\)$", expr_str) + if ( + m + and "postgresql" in index.dialect_options + and index.dialect_options["postgresql"].get("using") == "gin" + ): + args.append(f"text('upper(({m.group(1)})::text) gin_trgm_ops')") + else: + args.append(f"text({expr_str!r})") + else: + # Fallback + pass if index.unique: kwargs["unique"] = True @@ -849,11 +756,10 @@ def clx_render_index(self: "TablesGenerator", index: Index) -> str: kwargs["postgresql_using"] = ( f"'{using}'" if isinstance(using, str) else using ) - if opclass_map: kwargs["postgresql_ops"] = opclass_map - return render_callable("Index", repr(index.name), *elements, kwargs=kwargs) + return render_callable("Index", *args, kwargs=kwargs) TablesGenerator.render_index = clx_render_index # type: ignore[method-assign] @@ -862,9 +768,12 @@ def clx_render_index(self: "TablesGenerator", index: Index) -> str: def clx_render_table(self: "TablesGenerator", table: Table) -> str: args: list[str] = [f"{table.name!r}, {self.base.metadata_ref}"] kwargs: dict[str, object] = {} + + # Columns for column in table.columns: args.append(self.render_column(column, True, is_table=True)) + # Constraints for constraint in sorted(table.constraints, key=get_constraint_sort_key): if uses_default_name(constraint): if isinstance(constraint, PrimaryKeyConstraint): @@ -874,20 +783,25 @@ def clx_render_table(self: "TablesGenerator", table: Table) -> str: continue args.append(self.render_constraint(constraint)) - normal_indices = [idx for idx in table.indexes if not is_special_index(idx)] - for index in sorted(normal_indices, key=lambda i: str(i.name or "")): - # for index in sorted(table.indexes, key=lambda i: str(i.name or "")): - if len(index.columns) > 1 or not uses_default_name(index): - idx_code = self.render_index(index) - if idx_code.strip() and idx_code is not None: - args.append(idx_code) + # Indices + for index in sorted(table.indexes, key=lambda i: str(i.name or "")): + orig_columns = getattr(index, "columns", []) + if orig_columns: + table.indexes.remove(index) + columns = [table.c[unqualify(col.name)] for col in orig_columns] + new_index = Index(index.name, *columns, **index.kwargs) + table.append_constraint(new_index) + idx_code = self.render_index(index) + if idx_code.strip() and idx_code is not None: + args.append(idx_code) if table.schema: - kwargs["schema"] = repr(table.schema) + kwargs["schema"] = table.schema + # Table comment table_comment = getattr(table, "comment", None) if table_comment: - kwargs["comment"] = repr(table.comment) + kwargs["comment"] = table_comment return render_callable("Table", *args, kwargs=kwargs, indentation=" ") @@ -1223,13 +1137,12 @@ def render_table_args(self, table: Table) -> str: ): continue args.append(self.render_constraint(constraint)) - normal_indices = [idx for idx in table.indexes if not is_special_index(idx)] - for index in sorted(normal_indices, key=lambda i: str(i.name or "")): - # for index in sorted(table.indexes, key=lambda i: str(i.name or "")): - if len(index.columns) > 1 or not uses_default_name(index): - idx_code = self.render_index(index) - if idx_code.strip() and idx_code is not None: - args.append(idx_code) + + # NEU: ALLE Indexe (egal ob "special" oder nicht) + for index in sorted(table.indexes, key=lambda i: str(i.name or "")): + idx_code = self.render_index(index) + if idx_code.strip() and idx_code is not None: + args.append(idx_code) if table.schema: kwargs["schema"] = table.schema From dcd5f4a69f3bdf205dca11f92be7618a1a16d68c Mon Sep 17 00:00:00 2001 From: Marcus Steinbach Date: Fri, 11 Jul 2025 09:58:33 +0200 Subject: [PATCH 08/11] fix: get_table_dependency_order --- src/sqlacodegen/seed_export.py | 35 ++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/src/sqlacodegen/seed_export.py b/src/sqlacodegen/seed_export.py index 6aa85d93..5892f093 100644 --- a/src/sqlacodegen/seed_export.py +++ b/src/sqlacodegen/seed_export.py @@ -67,6 +67,9 @@ def get_table_dependency_order(metadata: MetaData) -> list[str]: from collections import defaultdict graph: dict[str, set[str]] = defaultdict(set) + for table in metadata.tables.values(): + graph[table.name] + for table in metadata.tables.values(): name = table.name for fk in table.foreign_keys: @@ -74,20 +77,28 @@ def get_table_dependency_order(metadata: MetaData) -> list[str]: if parent != name: graph[name].add(parent) - visited: set[str] = set() - result: list[str] = [] + try: + from graphlib import TopologicalSorter - def visit(node: str) -> None: - if node in visited: - return - visited.add(node) - for dep in graph[node]: - visit(dep) - result.append(node) + ts = TopologicalSorter(graph) + order = list(ts.static_order()) + except ImportError: + visited: set[str] = set() + result: list[str] = [] - for table in metadata.tables.values(): - visit(table.name) - return result[::-1] + def visit(node: str) -> None: + if node in visited: + return + visited.add(node) + for dep in graph[node]: + visit(dep) + result.append(node) + + for node in graph: + visit(node) + order = result[::-1] + + return order def export_pgdata_py( From 59d13de0954fbecf8ab406977c92112950329739 Mon Sep 17 00:00:00 2001 From: Marcus Steinbach Date: Fri, 11 Jul 2025 10:20:17 +0200 Subject: [PATCH 09/11] feat: added FACTORY_REGISTRY --- src/sqlacodegen/generate_factory_fixtures.py | 33 +++++++++++++++++--- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/src/sqlacodegen/generate_factory_fixtures.py b/src/sqlacodegen/generate_factory_fixtures.py index 9eb5d3d1..b94b29b9 100644 --- a/src/sqlacodegen/generate_factory_fixtures.py +++ b/src/sqlacodegen/generate_factory_fixtures.py @@ -3,6 +3,8 @@ from pathlib import Path from typing import Any +from sqlacodegen.risclog_generators import EXCLUDED_TABLES + FACTORY_HEADER = """\ # AUTO-GENERATED BY sqlacodegen from polyfactory.factories.sqlalchemy_factory import SQLAlchemyFactory @@ -16,22 +18,31 @@ class {class_name}Factory(SQLAlchemyFactory[{class_name}]): __model__ = {class_name}{set_relationships} """ +FACTORY_REGISTRY_HEADER = """ +# Registry for all generated factories +FACTORY_REGISTRY = {{ +{registry_entries} +}} +""" + def camel_to_snake(name: str) -> str: s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name) return re.sub("([a-z0-9])([A-Z])", r"\1_\2", s1).lower() -def render_factory(model: type[Any]) -> str: +def render_factory(model: type[Any]) -> tuple[str, str, str]: has_fk = bool(getattr(model.__table__, "foreign_keys", [])) set_relationships = "\n __set_relationships__ = True" if has_fk else "" class_name = model.__name__ fixture_name = f"{camel_to_snake(class_name)}_factory" - return FACTORY_TEMPLATE.format( + factory_class_name = f"{class_name}Factory" + factory_code = FACTORY_TEMPLATE.format( fixture_name=fixture_name, class_name=class_name, set_relationships=set_relationships, ) + return factory_code, fixture_name, factory_class_name def export_factory_fixtures( @@ -39,16 +50,28 @@ def export_factory_fixtures( factories_path: Path, dependency_order: Sequence[str], ) -> None: - model_names = {models_by_table[table].__name__ for table in dependency_order} + # *** Filter dependency_order anhand von EXCLUDED_TABLES *** + filtered_order = [ + table for table in dependency_order if table not in EXCLUDED_TABLES + ] + + model_names = {models_by_table[table].__name__ for table in filtered_order} import_statement = ( "from risclog.claimxdb.database import (\n " + ",\n ".join(sorted(model_names)) + "\n)" ) factories_lines = [FACTORY_HEADER.format(model_imports=import_statement)] + registry_entries = [] - for table in dependency_order: + for table in filtered_order: model = models_by_table[table] - factories_lines.append(render_factory(model)) + factory_code, fixture_name, factory_class_name = render_factory(model) + factories_lines.append(factory_code) + registry_entries.append(f' "{fixture_name}": {factory_class_name},') + + factories_lines.append( + FACTORY_REGISTRY_HEADER.format(registry_entries="\n".join(registry_entries)) + ) factories_path.write_text("\n".join(factories_lines), encoding="utf-8") From b4b2c545b1920cb514316f2eeb157838c172c239 Mon Sep 17 00:00:00 2001 From: Marcus Steinbach Date: Fri, 11 Jul 2025 11:10:45 +0200 Subject: [PATCH 10/11] feat: exclude views in seeds and factories --- src/sqlacodegen/cli.py | 6 ++++++ src/sqlacodegen/generate_factory_fixtures.py | 5 ++++- src/sqlacodegen/seed_export.py | 9 ++++++++- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/sqlacodegen/cli.py b/src/sqlacodegen/cli.py index 0479e041..6b856307 100644 --- a/src/sqlacodegen/cli.py +++ b/src/sqlacodegen/cli.py @@ -344,11 +344,16 @@ class ExportDict(TypedDict, total=False): # ----------- PGData SEED Export separat ------------ if args.outfile_dir: + all_view_names = set() + for schema in schemas: + all_view_names |= set(inspector.get_view_names(schema=schema)) + dest_pg_path = Path(str(parent), "pg_seeds.py") export_pgdata_py( engine=engine, metadata=metadata_tables, out_path=dest_pg_path, + view_table_names=all_view_names, ) print(f"PGData Seed geschrieben nach: {dest_pg_path.as_posix()}") @@ -389,5 +394,6 @@ def make_dynamic_models(metadata: MetaData) -> dict[str, type[Any]]: models_by_table=models_by_table, factories_path=Path(parent) / "factories.py", dependency_order=dependency_order, + view_table_names=all_view_names, ) print(f"Factories & Fixtures geschrieben nach: {parent.as_posix()}") diff --git a/src/sqlacodegen/generate_factory_fixtures.py b/src/sqlacodegen/generate_factory_fixtures.py index b94b29b9..3adbca28 100644 --- a/src/sqlacodegen/generate_factory_fixtures.py +++ b/src/sqlacodegen/generate_factory_fixtures.py @@ -49,10 +49,13 @@ def export_factory_fixtures( models_by_table: dict[str, type[Any]], factories_path: Path, dependency_order: Sequence[str], + view_table_names: set[str], ) -> None: # *** Filter dependency_order anhand von EXCLUDED_TABLES *** filtered_order = [ - table for table in dependency_order if table not in EXCLUDED_TABLES + table + for table in dependency_order + if table not in EXCLUDED_TABLES and table not in view_table_names ] model_names = {models_by_table[table].__name__ for table in filtered_order} diff --git a/src/sqlacodegen/seed_export.py b/src/sqlacodegen/seed_export.py index 5892f093..61e15f40 100644 --- a/src/sqlacodegen/seed_export.py +++ b/src/sqlacodegen/seed_export.py @@ -102,8 +102,13 @@ def visit(node: str) -> None: def export_pgdata_py( - engine: Engine, metadata: MetaData, out_path: Path, max_rows: int | None = None + engine: Engine, + metadata: MetaData, + out_path: Path, + max_rows: int | None = None, + view_table_names: set[str] | None = None, ) -> None: + view_table_names = view_table_names or set() order = get_table_dependency_order(metadata) data: dict[str, list[dict[str, Any]]] = {} @@ -111,6 +116,8 @@ def export_pgdata_py( for name in order: if name not in metadata.tables: continue + if name in view_table_names: + continue table = metadata.tables[name] stmt = select(table) if max_rows is not None: From 067ce1731d5dd992c68d01e1ccf897285938a05e Mon Sep 17 00:00:00 2001 From: Marcus Steinbach Date: Mon, 14 Jul 2025 04:38:20 +0200 Subject: [PATCH 11/11] feat: remove factory generator --- src/sqlacodegen/cli.py | 20 +---- src/sqlacodegen/generate_factory_fixtures.py | 80 -------------------- src/sqlacodegen/generators.py | 15 ++++ src/sqlacodegen/risclog_generators.py | 1 + 4 files changed, 17 insertions(+), 99 deletions(-) delete mode 100644 src/sqlacodegen/generate_factory_fixtures.py diff --git a/src/sqlacodegen/cli.py b/src/sqlacodegen/cli.py index 6b856307..ce18d7ae 100644 --- a/src/sqlacodegen/cli.py +++ b/src/sqlacodegen/cli.py @@ -25,7 +25,6 @@ except ImportError: pgvector = None -from sqlacodegen.generate_factory_fixtures import export_factory_fixtures from sqlacodegen.risclog_generators import ( parse_aggregate_row, parse_extension_row, @@ -34,7 +33,7 @@ parse_publication_row, parse_trigger_row, ) -from sqlacodegen.seed_export import export_pgdata_py, get_table_dependency_order +from sqlacodegen.seed_export import export_pgdata_py if sys.version_info < (3, 10): from importlib_metadata import entry_points, version @@ -380,20 +379,3 @@ def make_dynamic_models(metadata: MetaData) -> dict[str, type[Any]]: model = type(class_name, (Base,), {"__table__": table}) models_by_table[table.name] = model return models_by_table - - Base = getattr(generator, "base", None) - if Base is not None: - models = get_all_models(Base) - models_by_table = {m.__tablename__: m for m in models} - else: - models_by_table = make_dynamic_models(metadata_tables) - - dependency_order = get_table_dependency_order(metadata_tables) - - export_factory_fixtures( - models_by_table=models_by_table, - factories_path=Path(parent) / "factories.py", - dependency_order=dependency_order, - view_table_names=all_view_names, - ) - print(f"Factories & Fixtures geschrieben nach: {parent.as_posix()}") diff --git a/src/sqlacodegen/generate_factory_fixtures.py b/src/sqlacodegen/generate_factory_fixtures.py deleted file mode 100644 index 3adbca28..00000000 --- a/src/sqlacodegen/generate_factory_fixtures.py +++ /dev/null @@ -1,80 +0,0 @@ -import re -from collections.abc import Sequence -from pathlib import Path -from typing import Any - -from sqlacodegen.risclog_generators import EXCLUDED_TABLES - -FACTORY_HEADER = """\ -# AUTO-GENERATED BY sqlacodegen -from polyfactory.factories.sqlalchemy_factory import SQLAlchemyFactory -from polyfactory.pytest_plugin import register_fixture -{model_imports} -""" - -FACTORY_TEMPLATE = """\ -@register_fixture(name="{fixture_name}") -class {class_name}Factory(SQLAlchemyFactory[{class_name}]): - __model__ = {class_name}{set_relationships} -""" - -FACTORY_REGISTRY_HEADER = """ -# Registry for all generated factories -FACTORY_REGISTRY = {{ -{registry_entries} -}} -""" - - -def camel_to_snake(name: str) -> str: - s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name) - return re.sub("([a-z0-9])([A-Z])", r"\1_\2", s1).lower() - - -def render_factory(model: type[Any]) -> tuple[str, str, str]: - has_fk = bool(getattr(model.__table__, "foreign_keys", [])) - set_relationships = "\n __set_relationships__ = True" if has_fk else "" - class_name = model.__name__ - fixture_name = f"{camel_to_snake(class_name)}_factory" - factory_class_name = f"{class_name}Factory" - factory_code = FACTORY_TEMPLATE.format( - fixture_name=fixture_name, - class_name=class_name, - set_relationships=set_relationships, - ) - return factory_code, fixture_name, factory_class_name - - -def export_factory_fixtures( - models_by_table: dict[str, type[Any]], - factories_path: Path, - dependency_order: Sequence[str], - view_table_names: set[str], -) -> None: - # *** Filter dependency_order anhand von EXCLUDED_TABLES *** - filtered_order = [ - table - for table in dependency_order - if table not in EXCLUDED_TABLES and table not in view_table_names - ] - - model_names = {models_by_table[table].__name__ for table in filtered_order} - import_statement = ( - "from risclog.claimxdb.database import (\n " - + ",\n ".join(sorted(model_names)) - + "\n)" - ) - factories_lines = [FACTORY_HEADER.format(model_imports=import_statement)] - registry_entries = [] - - for table in filtered_order: - model = models_by_table[table] - factory_code, fixture_name, factory_class_name = render_factory(model) - factories_lines.append(factory_code) - registry_entries.append(f' "{fixture_name}": {factory_class_name},') - - factories_lines.append( - FACTORY_REGISTRY_HEADER.format(registry_entries="\n".join(registry_entries)) - ) - - factories_path.write_text("\n".join(factories_lines), encoding="utf-8") diff --git a/src/sqlacodegen/generators.py b/src/sqlacodegen/generators.py index 7d9bc542..030ba93f 100644 --- a/src/sqlacodegen/generators.py +++ b/src/sqlacodegen/generators.py @@ -1225,6 +1225,21 @@ def render_column_attribute(self, column_attr: ColumnAttribute) -> str: column = column_attr.column rendered_column = self.render_column(column, column_attr.name != column.name) + is_uuid_pk = ( + column.primary_key + and getattr(column.type, "python_type", None) + in (str, bytes) # meistens str + and "Uuid" in str(column.type) + and not getattr(column, "default", None) + and not getattr(column, "server_default", None) + ) + if is_uuid_pk and "default=uuid4" not in rendered_column: + # default=uuid4 in den Column-Aufruf einfügen (vor letztem ")") + if rendered_column.endswith(")"): + # Einfachstes Pattern: vor ")" einfügen + rendered_column = rendered_column[:-1] + ", default=uuid4)" + self.add_literal_import("uuid", "uuid4") + def get_type_qualifiers() -> tuple[str, TypeEngine[Any], str]: column_type = column.type pre: list[str] = [] diff --git a/src/sqlacodegen/risclog_generators.py b/src/sqlacodegen/risclog_generators.py index 70e24270..cd8e0637 100644 --- a/src/sqlacodegen/risclog_generators.py +++ b/src/sqlacodegen/risclog_generators.py @@ -1081,6 +1081,7 @@ def render_view_classes( has_id = False for col in table.columns: + sa_type = sa_type_from_column(col) if col.name == "id": has_id = True