From d6881aee96383bede455447fc167b9f70712f4e9 Mon Sep 17 00:00:00 2001 From: Alex TYRODE Date: Thu, 8 May 2025 17:42:31 +0000 Subject: [PATCH 1/2] chore: remove Alembic migration files and related configurations - Deleted Alembic migration files and configuration to streamline the project and eliminate unused components. - Updated documentation to reflect the removal of distributed operations coordination from Redis. - Removed references to Alembic in the requirements and project structure, simplifying the database management approach. --- docs/context.md | 1 - src/backend/database/alembic.ini | 123 --------- src/backend/database/migrations/env.py | 106 -------- .../database/migrations/script.py.mako | 28 --- .../2025_05_02_2055-migrate_canvas_data.py | 236 ------------------ .../versions/2025_05_04_2310-create_schema.py | 46 ---- src/backend/requirements.txt | 1 - 7 files changed, 541 deletions(-) delete mode 100644 src/backend/database/alembic.ini delete mode 100644 src/backend/database/migrations/env.py delete mode 100644 src/backend/database/migrations/script.py.mako delete mode 100644 src/backend/database/migrations/versions/2025_05_02_2055-migrate_canvas_data.py delete mode 100644 src/backend/database/migrations/versions/2025_05_04_2310-create_schema.py diff --git a/docs/context.md b/docs/context.md index e88fda7..b5eb613 100644 --- a/docs/context.md +++ b/docs/context.md @@ -64,7 +64,6 @@ The system follows a microservices architecture with the following components: 3. **Redis** - Manages user sessions - Provides caching for performance - - Coordinates distributed operations (like migrations) 4. **Keycloak** - Provides OIDC authentication diff --git a/src/backend/database/alembic.ini b/src/backend/database/alembic.ini deleted file mode 100644 index f36d6dd..0000000 --- a/src/backend/database/alembic.ini +++ /dev/null @@ -1,123 +0,0 @@ -# A generic, single database configuration. - -[alembic] -# path to migration scripts -# Use forward slashes (/) also on windows to provide an os agnostic path -script_location = migrations - -# Use a more descriptive file template that includes date and time -file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s - -# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s -# Uncomment the line below if you want the files to be prepended with date and time -# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file -# for all available tokens -# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s - -# sys.path path, will be prepended to sys.path if present. -# defaults to the current working directory. -prepend_sys_path = . - -# timezone to use when rendering the date within the migration file -# as well as the filename. -# If specified, requires the python>=3.9 or backports.zoneinfo library and tzdata library. -# Any required deps can installed by adding `alembic[tz]` to the pip requirements -# string value is passed to ZoneInfo() -# leave blank for localtime -# timezone = - -# max length of characters to apply to the "slug" field -# truncate_slug_length = 40 - -# set to 'true' to run the environment during -# the 'revision' command, regardless of autogenerate -# revision_environment = false - -# set to 'true' to allow .pyc and .pyo files without -# a source .py file to be detected as revisions in the -# versions/ directory -# sourceless = false - -# version location specification; This defaults -# to migrations/versions. When using multiple version -# directories, initial revisions must be specified with --version-path. -# The path separator used here should be the separator specified by "version_path_separator" below. -# version_locations = %(here)s/bar:%(here)s/bat:migrations/versions - -# version path separator; As mentioned above, this is the character used to split -# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. -# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. -# Valid values for version_path_separator are: -# -# version_path_separator = : -# version_path_separator = ; -# version_path_separator = space -# version_path_separator = newline -# -# Use os.pathsep. Default configuration used for new projects. -version_path_separator = os - -# set to 'true' to search source files recursively -# in each "version_locations" directory -# new in Alembic version 1.10 -# recursive_version_locations = false - -# the output encoding used when revision files -# are written from script.py.mako -# output_encoding = utf-8 - -# The SQLAlchemy connection URL is set in env.py from environment variables -sqlalchemy.url = postgresql://postgres:postgres@localhost/pad - - -[post_write_hooks] -# post_write_hooks defines scripts or Python functions that are run -# on newly generated revision scripts. See the documentation for further -# detail and examples - -# format using "black" - use the console_scripts runner, against the "black" entrypoint -# hooks = black -# black.type = console_scripts -# black.entrypoint = black -# black.options = -l 79 REVISION_SCRIPT_FILENAME - -# lint with attempts to fix using "ruff" - use the exec runner, execute a binary -# hooks = ruff -# ruff.type = exec -# ruff.executable = %(here)s/.venv/bin/ruff -# ruff.options = check --fix REVISION_SCRIPT_FILENAME - -# Logging configuration -[loggers] -keys = root,sqlalchemy,alembic - -[handlers] -keys = console - -[formatters] -keys = generic - -[logger_root] -level = WARNING -handlers = console -qualname = - -[logger_sqlalchemy] -level = WARNING -handlers = -qualname = sqlalchemy.engine - -[logger_alembic] -level = INFO -handlers = -qualname = alembic - -[handler_console] -class = StreamHandler -args = (sys.stderr,) -level = NOTSET -formatter = generic - -[formatter_generic] -format = %(levelname)-5.5s [%(name)s] %(message)s -datefmt = %H:%M:%S diff --git a/src/backend/database/migrations/env.py b/src/backend/database/migrations/env.py deleted file mode 100644 index d753507..0000000 --- a/src/backend/database/migrations/env.py +++ /dev/null @@ -1,106 +0,0 @@ -from logging.config import fileConfig -import os -import sys -from pathlib import Path - -from sqlalchemy import engine_from_config -from sqlalchemy import pool -from sqlalchemy.engine import URL - -from alembic import context -from dotenv import load_dotenv - -# Add the parent directory to sys.path -sys.path.append(str(Path(__file__).parent.parent.parent.parent)) - -# Load environment variables from .env file -load_dotenv() - -# this is the Alembic Config object, which provides -# access to the values within the .ini file in use. -config = context.config - -# Interpret the config file for Python logging. -# This line sets up loggers basically. -if config.config_file_name is not None: - fileConfig(config.config_file_name) - -# Import the Base metadata from the models -# We need to handle imports differently to avoid module not found errors -import importlib.util -import os - -# Get the absolute path to the models module -models_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "models", "__init__.py") - -# Load the module dynamically -spec = importlib.util.spec_from_file_location("models", models_path) -models = importlib.util.module_from_spec(spec) -spec.loader.exec_module(models) - -# Get Base and SCHEMA_NAME from the loaded module -Base = models.Base -SCHEMA_NAME = models.SCHEMA_NAME -target_metadata = Base.metadata - -# other values from the config, defined by the needs of env.py, -# can be acquired: -# my_important_option = config.get_main_option("my_important_option") -# ... etc. - - -# Get database connection details from environment variables -DB_USER = os.getenv('POSTGRES_USER', 'postgres') -DB_PASSWORD = os.getenv('POSTGRES_PASSWORD', 'postgres') -DB_NAME = os.getenv('POSTGRES_DB', 'pad') -DB_HOST = os.getenv('POSTGRES_HOST', 'localhost') -DB_PORT = os.getenv('POSTGRES_PORT', '5432') - -# Override sqlalchemy.url in alembic.ini -config.set_main_option('sqlalchemy.url', f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}") - -def run_migrations_offline() -> None: - """Run migrations in 'offline' mode.""" - url = config.get_main_option("sqlalchemy.url") - context.configure( - url=url, - target_metadata=target_metadata, - literal_binds=True, - dialect_opts={"paramstyle": "named"}, - include_schemas=True, - version_table_schema=SCHEMA_NAME, - ) - - with context.begin_transaction(): - context.run_migrations() - - -def run_migrations_online() -> None: - """Run migrations in 'online' mode. - - In this scenario we need to create an Engine - and associate a connection with the context. - - """ - connectable = engine_from_config( - config.get_section(config.config_ini_section, {}), - prefix="sqlalchemy.", - poolclass=pool.NullPool, - ) - - with connectable.connect() as connection: - context.configure( - connection=connection, - target_metadata=target_metadata, - include_schemas=True, - version_table_schema=SCHEMA_NAME - ) - - with context.begin_transaction(): - context.run_migrations() - - -if context.is_offline_mode(): - run_migrations_offline() -else: - run_migrations_online() diff --git a/src/backend/database/migrations/script.py.mako b/src/backend/database/migrations/script.py.mako deleted file mode 100644 index 480b130..0000000 --- a/src/backend/database/migrations/script.py.mako +++ /dev/null @@ -1,28 +0,0 @@ -"""${message} - -Revision ID: ${up_revision} -Revises: ${down_revision | comma,n} -Create Date: ${create_date} - -""" -from typing import Sequence, Union - -from alembic import op -import sqlalchemy as sa -${imports if imports else ""} - -# revision identifiers, used by Alembic. -revision: str = ${repr(up_revision)} -down_revision: Union[str, None] = ${repr(down_revision)} -branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} -depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} - - -def upgrade() -> None: - """Upgrade schema.""" - ${upgrades if upgrades else "pass"} - - -def downgrade() -> None: - """Downgrade schema.""" - ${downgrades if downgrades else "pass"} diff --git a/src/backend/database/migrations/versions/2025_05_02_2055-migrate_canvas_data.py b/src/backend/database/migrations/versions/2025_05_02_2055-migrate_canvas_data.py deleted file mode 100644 index 18098e7..0000000 --- a/src/backend/database/migrations/versions/2025_05_02_2055-migrate_canvas_data.py +++ /dev/null @@ -1,236 +0,0 @@ -"""Migrate canvas_data and canvas_backups to new schema - -Revision ID: migrate_canvas_data -Revises: -Create Date: 2025-05-02 20:55:00.000000 - -""" -import logging -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects.postgresql import UUID, JSONB -from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.future import select -from sqlalchemy.orm import Session -import uuid -from datetime import datetime - -# revision identifiers, used by Alembic. -revision = 'migrate_canvas_data' -down_revision = 'create_schema' # This migration depends on the schema creation -branch_labels = None -depends_on = None - -# Import the schema name from the models using dynamic import -import importlib.util -import os - -# Get the absolute path to the base_model module -base_model_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), "models", "base_model.py") - -# Load the module dynamically -spec = importlib.util.spec_from_file_location("base_model", base_model_path) -base_model = importlib.util.module_from_spec(spec) -spec.loader.exec_module(base_model) - -# Get SCHEMA_NAME from the loaded module -SCHEMA_NAME = base_model.SCHEMA_NAME - -def table_exists(connection, table_name, schema='public'): - """Check if a table exists in the database""" - query = sa.text( - """ - SELECT EXISTS ( - SELECT FROM information_schema.tables - WHERE table_schema = :schema - AND table_name = :table_name - ) - """ - ) - result = connection.execute(query, {"schema": schema, "table_name": table_name}).scalar() - return bool(result) - -def upgrade() -> None: - """Migrate data from old tables to new schema""" - - # Create a connection to execute raw SQL - connection = op.get_bind() - - # Define tables for direct SQL operations - metadata = sa.MetaData() - - # Check if the source tables exist - canvas_data_exists = table_exists(connection, 'canvas_data') - canvas_backups_exists = table_exists(connection, 'canvas_backups') - - if not canvas_data_exists and not canvas_backups_exists: - logging.info("Source tables 'canvas_data' and 'canvas_backups' do not exist. Skipping data migration.") - return - - # Define the old tables in the public schema - canvas_data = sa.Table( - 'canvas_data', - metadata, - sa.Column('user_id', UUID(as_uuid=True), primary_key=True), - sa.Column('data', JSONB), - schema='public' - ) - - canvas_backups = sa.Table( - 'canvas_backups', - metadata, - sa.Column('id', sa.Integer, primary_key=True), - sa.Column('user_id', UUID(as_uuid=True)), - sa.Column('canvas_data', JSONB), - sa.Column('timestamp', sa.DateTime), - schema='public' - ) - - # Define the new tables in the pad_ws schema with all required columns - users = sa.Table( - 'users', - metadata, - sa.Column('id', UUID(as_uuid=True), primary_key=True), - sa.Column('username', sa.String(254)), - sa.Column('email', sa.String(254)), - sa.Column('email_verified', sa.Boolean), - sa.Column('name', sa.String(254)), - sa.Column('given_name', sa.String(254)), - sa.Column('family_name', sa.String(254)), - sa.Column('roles', JSONB), - schema=SCHEMA_NAME - ) - - pads = sa.Table( - 'pads', - metadata, - sa.Column('id', UUID(as_uuid=True), primary_key=True), - sa.Column('owner_id', UUID(as_uuid=True)), - sa.Column('display_name', sa.String(100)), - sa.Column('data', JSONB), - schema=SCHEMA_NAME - ) - - backups = sa.Table( - 'backups', - metadata, - sa.Column('id', UUID(as_uuid=True), primary_key=True), - sa.Column('source_id', UUID(as_uuid=True)), - sa.Column('data', JSONB), - sa.Column('created_at', sa.DateTime), - schema=SCHEMA_NAME - ) - - # Create a session for ORM operations - session = Session(connection) - - try: - # Dictionary to store user_id -> pad_id mapping for later use with backups - user_pad_mapping = {} - - # Step 1: Process canvas_data if it exists - if canvas_data_exists: - try: - # Get all canvas_data records - canvas_data_records = session.execute(sa.select(canvas_data)).fetchall() - logging.info(f"Found {len(canvas_data_records)} records in canvas_data table") - - # Step 2: For each canvas_data record, create a new pad - for record in canvas_data_records: - user_id = record.user_id - - # Check if the user exists in the new schema - user_exists = session.execute( - sa.select(users).where(users.c.id == user_id) - ).fetchone() - - if not user_exists: - logging.info(f"User {user_id} not found in new schema, creating with placeholder data") - # Create a new user with placeholder data - # The real data will be updated when the user accesses the /me route - session.execute( - users.insert().values( - id=user_id, - username=f"migrated_user_{user_id}", - email=f"migrated_{user_id}@example.com", - email_verified=False, - name="Migrated User", - given_name="Migrated", - family_name="User", - roles=[], - ) - ) - - # Generate a new UUID for the pad - pad_id = uuid.uuid4() - - # Store the mapping for later use - user_pad_mapping[user_id] = pad_id - - # Insert the pad record - session.execute( - pads.insert().values( - id=pad_id, - owner_id=user_id, - display_name="Untitled", - data=record.data, - ) - ) - except Exception as e: - logging.error(f"Error processing canvas_data: {e}") - session.rollback() - raise - - # Step 3: Process canvas_backups if it exists - if canvas_backups_exists and user_pad_mapping: # Only process backups if we have pads - try: - # Get all canvas_backups records - canvas_backup_records = session.execute(sa.select(canvas_backups)).fetchall() - logging.info(f"Found {len(canvas_backup_records)} records in canvas_backups table") - - # Step 4: For each canvas_backup record, create a new backup - for record in canvas_backup_records: - user_id = record.user_id - - # Skip if we don't have a pad for this user - if user_id not in user_pad_mapping: - logging.warning(f"No pad found for user {user_id}, skipping backup") - continue - - pad_id = user_pad_mapping[user_id] - - # Insert the backup record - session.execute( - backups.insert().values( - id=uuid.uuid4(), - source_id=pad_id, - data=record.canvas_data, # Note: using canvas_data field from the record - created_at=record.timestamp, - ) - ) - except Exception as e: - logging.error(f"Error processing canvas_backups: {e}") - session.rollback() - raise - - # Commit the transaction - session.commit() - - if canvas_data_exists or canvas_backups_exists: - pad_count = len(user_pad_mapping) if canvas_data_exists else 0 - backup_count = len(canvas_backup_records) if canvas_backups_exists and 'canvas_backup_records' in locals() else 0 - logging.info(f"Migration complete: {pad_count} pads and {backup_count} backups migrated") - else: - logging.info("No data to migrate") - - except Exception as e: - session.rollback() - logging.error(f"Error during migration: {e}") - raise - finally: - session.close() - - -def downgrade() -> None: - """Downgrade is not supported for this migration""" - print("Downgrade is not supported for this data migration") diff --git a/src/backend/database/migrations/versions/2025_05_04_2310-create_schema.py b/src/backend/database/migrations/versions/2025_05_04_2310-create_schema.py deleted file mode 100644 index 70753e5..0000000 --- a/src/backend/database/migrations/versions/2025_05_04_2310-create_schema.py +++ /dev/null @@ -1,46 +0,0 @@ -"""Create schema explicitly - -Revision ID: create_schema -Revises: -Create Date: 2025-05-04 23:10:00.000000 - -""" -from typing import Sequence, Union - -from alembic import op -import sqlalchemy as sa - -# Import the schema name from the models using dynamic import -import importlib.util -import os - -# Get the absolute path to the base_model module -base_model_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), "models", "base_model.py") - -# Load the module dynamically -spec = importlib.util.spec_from_file_location("base_model", base_model_path) -base_model = importlib.util.module_from_spec(spec) -spec.loader.exec_module(base_model) - -# Get SCHEMA_NAME from the loaded module -SCHEMA_NAME = base_model.SCHEMA_NAME - -# revision identifiers, used by Alembic. -revision: str = 'create_schema' -down_revision: Union[str, None] = None # This is the first migration -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - """Create schema explicitly before other operations.""" - # Create schema using execute() with a SQL string instead of CreateSchema - # This approach can be more reliable in certain PostgreSQL versions - op.execute(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}") - - -def downgrade() -> None: - """Drop schema if needed.""" - # We don't actually want to drop the schema on downgrade - # as it would delete all data, but the function is required - pass diff --git a/src/backend/requirements.txt b/src/backend/requirements.txt index 302d8ff..d82093d 100644 --- a/src/backend/requirements.txt +++ b/src/backend/requirements.txt @@ -12,4 +12,3 @@ redis psycopg2-binary python-multipart cryptography # Required for JWT key handling -alembic \ No newline at end of file From a184a135c709594186346d854384be987ed0fcb5 Mon Sep 17 00:00:00 2001 From: Alex TYRODE Date: Thu, 8 May 2025 17:57:57 +0000 Subject: [PATCH 2/2] chore: change default API_WORKERS to 1 --- .env.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.env.template b/.env.template index 084de2b..720d989 100644 --- a/.env.template +++ b/.env.template @@ -5,7 +5,7 @@ CODER_PORT=7080 APP_PORT=8000 # API Configuration -API_WORKERS=4 +API_WORKERS=1 FRONTEND_URL=your_frontend_url # Database Configuration