From 57945e29bb43dc8fbef75c366e6301a017f6c387 Mon Sep 17 00:00:00 2001 From: dc Date: Sat, 21 Jun 2025 17:15:31 +0530 Subject: [PATCH 01/38] check for request in kwargs as well --- api/activities/decorators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/activities/decorators.py b/api/activities/decorators.py index 2d644417..ab7cbf7e 100644 --- a/api/activities/decorators.py +++ b/api/activities/decorators.py @@ -39,7 +39,7 @@ def decorator(func: F) -> F: def wrapper(*args: Any, **kwargs: Any) -> Any: # Extract request from args (typically the first or second argument in view functions) request = None - for arg in args: + for arg in list(args) + list(kwargs.values()): if isinstance(arg, HttpRequest): request = arg break From a3f8610c17c69b59b40dc62f1af5b82b1c054a9c Mon Sep 17 00:00:00 2001 From: dc Date: Mon, 23 Jun 2025 16:28:59 +0530 Subject: [PATCH 02/38] add all org query --- api/schema/organization_schema.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/api/schema/organization_schema.py b/api/schema/organization_schema.py index 06d1c86f..9507c435 100644 --- a/api/schema/organization_schema.py +++ b/api/schema/organization_schema.py @@ -85,6 +85,15 @@ def organizations( return [TypeOrganization.from_django(org) for org in queryset] + @strawberry_django.field(permission_classes=[IsAuthenticated]) + def all_organizations(self, info: Info) -> List[TypeOrganization]: + """Get all organizations.""" + user = info.context.user + if not user or getattr(user, "is_anonymous", True): + logging.warning("Anonymous user or no user found in context") + return [] + return [TypeOrganization.from_django(org) for org in Organization.objects.all()] + @strawberry_django.field def organization(self, info: Info, id: str) -> Optional[TypeOrganization]: """Get organization by ID.""" From efd622fe03b9faa5fe3fe76b40593216268c90c6 Mon Sep 17 00:00:00 2001 From: dc Date: Mon, 23 Jun 2025 16:29:34 +0530 Subject: [PATCH 03/38] add example env file --- .env.example | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 .env.example diff --git a/.env.example b/.env.example new file mode 100644 index 00000000..78d362b9 --- /dev/null +++ b/.env.example @@ -0,0 +1,14 @@ +DB_ENGINE=django.db.backends.postgresql +DB_NAME=postgres +DB_USER=postgres +DB_PASSWORD=postgres +DB_HOST=backend_db +DB_PORT=5432 +TELEMETRY_URL=http://otel-collector:4317 +ELASTICSEARCH_INDEX=http://elasticsearch:9200 +ELASTICSEARCH_USERNAME=elastic +ELASTICSEARCH_PASS=changeme +URL_WHITELIST=http://localhost:8000,http://localhost,http://localhost:3000 +DEBUG=True +SECRET_KEY=your-secret-key +REDIS_URL=redis://redis:6379/1 From f996c1be70c0acb172adbb59b3d9ccfe671ae3c5 Mon Sep 17 00:00:00 2001 From: dc Date: Mon, 23 Jun 2025 16:30:00 +0530 Subject: [PATCH 04/38] use distng when fetching org and usecase relationships --- api/types/type_organization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/types/type_organization.py b/api/types/type_organization.py index be51d46b..3c83caf8 100644 --- a/api/types/type_organization.py +++ b/api/types/type_organization.py @@ -59,7 +59,7 @@ def published_use_cases_count(self, info: Info) -> int: use_cases = UseCase.objects.filter( usecaseorganizationrelationship__organization_id=org_id, # type: ignore status=UseCaseStatus.PUBLISHED.value, - ) + ).distinct() return use_cases.count() except Exception: return 0 From 2b63e873907f94fcd0d4e4ae11eff47222874b1f Mon Sep 17 00:00:00 2001 From: dc Date: Mon, 23 Jun 2025 16:53:03 +0530 Subject: [PATCH 05/38] update logging limits --- docker-compose.yml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index 3fc51910..607d99b4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -22,6 +22,10 @@ services: timeout: 10s retries: 3 start_period: 40s + logging: + options: + max-size: "10m" + max-file: "3" backend_db: image: "postgres:14.4" @@ -42,6 +46,10 @@ services: timeout: 5s retries: 5 start_period: 10s + logging: + options: + max-size: "10m" + max-file: "3" elasticsearch: image: docker.elastic.co/elasticsearch/elasticsearch:8.12.2 @@ -74,6 +82,10 @@ services: timeout: 10s retries: 3 start_period: 40s + logging: + options: + max-size: "10m" + max-file: "3" redis: image: "redis:alpine" @@ -90,6 +102,10 @@ services: timeout: 5s retries: 5 start_period: 10s + logging: + options: + max-size: "10m" + max-file: "3" telemetry_elasticsearch: image: docker.elastic.co/elasticsearch/elasticsearch:7.16.2 @@ -116,6 +132,10 @@ services: test: curl -s http://localhost:9200/_cluster/health | grep -vq '"status":"red"' + logging: + options: + max-size: "10m" + max-file: "3" kibana: image: docker.elastic.co/kibana/kibana:7.16.2 @@ -171,6 +191,10 @@ services: test: curl --write-out 'HTTP %{http_code}' --fail --silent --output /dev/null http://localhost:8200/ + logging: + options: + max-size: "10m" + max-file: "3" otel-collector: image: otel/opentelemetry-collector:latest @@ -184,6 +208,10 @@ services: condition: service_healthy ports: - 4317:4317 + logging: + options: + max-size: "10m" + max-file: "3" volumes: backend_db_data: From f2a8acb1b644daf04e0284f91287fe4f4611bf6b Mon Sep 17 00:00:00 2001 From: dc Date: Mon, 23 Jun 2025 16:56:03 +0530 Subject: [PATCH 06/38] allow editing only draft datasets --- api/schema/dataset_schema.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/api/schema/dataset_schema.py b/api/schema/dataset_schema.py index 372511ad..7a9f2ab8 100644 --- a/api/schema/dataset_schema.py +++ b/api/schema/dataset_schema.py @@ -564,6 +564,10 @@ def add_update_dataset_metadata( dataset = Dataset.objects.get(id=dataset_id) except Dataset.DoesNotExist as e: raise DjangoValidationError(f"Dataset with ID {dataset_id} does not exist.") + if dataset.status != DatasetStatus.DRAFT.value: + raise DjangoValidationError( + f"Dataset with ID {dataset_id} is not in draft status." + ) if update_metadata_input.description: dataset.description = update_metadata_input.description From 3e22e2c26479246588d909c8ce1757024f1f5dfd Mon Sep 17 00:00:00 2001 From: dc Date: Mon, 23 Jun 2025 17:08:07 +0530 Subject: [PATCH 07/38] return those usecases where org is owner as well --- api/schema/organization_data_schema.py | 8 +++++++- api/types/type_organization.py | 3 ++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/api/schema/organization_data_schema.py b/api/schema/organization_data_schema.py index 7d5744fc..0620b23a 100644 --- a/api/schema/organization_data_schema.py +++ b/api/schema/organization_data_schema.py @@ -4,6 +4,7 @@ import strawberry import strawberry_django +from django.db.models import Q from strawberry.types import Info from api.models import Dataset, Organization, Sector, UseCase @@ -58,7 +59,12 @@ def organization_published_use_cases( try: # Get published use cases for this organization queryset = UseCase.objects.filter( - usecaseorganizationrelationship__organization_id=organization_id, + ( + Q(organization__id=organization_id) + | Q( + usecaseorganizationrelationship__organization_id=organization_id + ) + ), status=UseCaseStatus.PUBLISHED.value, ).distinct() return TypeUseCase.from_django_list(queryset) diff --git a/api/types/type_organization.py b/api/types/type_organization.py index 3c83caf8..d0c3e776 100644 --- a/api/types/type_organization.py +++ b/api/types/type_organization.py @@ -2,6 +2,7 @@ import strawberry import strawberry_django +from django.db.models import Q from strawberry import Info, auto from api.models import Organization @@ -57,7 +58,7 @@ def published_use_cases_count(self, info: Info) -> int: return 0 use_cases = UseCase.objects.filter( - usecaseorganizationrelationship__organization_id=org_id, # type: ignore + (Q(organization__id=org_id) | Q(usecaseorganizationrelationship__organization_id=org_id)), # type: ignore status=UseCaseStatus.PUBLISHED.value, ).distinct() return use_cases.count() From 5ff4e6eed2e577ddc9ffb8683a546876a3b9b606 Mon Sep 17 00:00:00 2001 From: dc Date: Mon, 23 Jun 2025 18:28:19 +0530 Subject: [PATCH 08/38] update publishers query to handle individual publishers --- api/schema/dataset_schema.py | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/api/schema/dataset_schema.py b/api/schema/dataset_schema.py index 7a9f2ab8..47b9c874 100644 --- a/api/schema/dataset_schema.py +++ b/api/schema/dataset_schema.py @@ -18,6 +18,7 @@ ResourceChartDetails, ResourceChartImage, Sector, + UseCase, ) from api.models.Dataset import Tag from api.models.DatasetMetadata import DatasetMetadata @@ -31,7 +32,12 @@ from api.types.type_organization import TypeOrganization from api.types.type_resource_chart import TypeResourceChart from api.types.type_resource_chart_image import TypeResourceChartImage -from api.utils.enums import DatasetAccessType, DatasetLicense, DatasetStatus +from api.utils.enums import ( + DatasetAccessType, + DatasetLicense, + DatasetStatus, + UseCaseStatus, +) from api.utils.graphql_telemetry import trace_resolver from authorization.models import DatasetPermission, OrganizationMembership, Role, User from authorization.permissions import ( @@ -469,20 +475,30 @@ def get_publishers(self, info: Info) -> List[Union[TypeOrganization, TypeUser]]: published_datasets = Dataset.objects.filter( status=DatasetStatus.PUBLISHED.value ) + published_ds_organizations = published_datasets.values_list( + "organization_id", flat=True + ) + published_usecases = UseCase.objects.filter( + status=UseCaseStatus.PUBLISHED.value + ) + published_uc_organizations = published_usecases.values_list( + "organization_id", flat=True + ) + published_organizations = set(published_ds_organizations) | set( + published_uc_organizations + ) # Get unique organizations that have published datasets org_publishers = Organization.objects.filter( - id__in=published_datasets.filter(organization__isnull=False).values_list( - "organization_id", flat=True - ) + id__in=published_organizations ).distinct() + published_ds_users = published_datasets.values_list("user_id", flat=True) + published_uc_users = published_usecases.values_list("user_id", flat=True) + published_users = set(published_ds_users) | set(published_uc_users) + # Get unique individual users who have published datasets without an organization - individual_publishers = User.objects.filter( - id__in=published_datasets.filter(organization__isnull=True).values_list( - "user_id", flat=True - ) - ).distinct() + individual_publishers = User.objects.filter(id__in=published_users).distinct() # Convert to GraphQL types org_types = [TypeOrganization.from_django(org) for org in org_publishers] From 601a7cd01c3f3137238d86919c02edee363d2fd4 Mon Sep 17 00:00:00 2001 From: dc Date: Mon, 23 Jun 2025 18:42:02 +0530 Subject: [PATCH 09/38] add constraints for user role editing --- authorization/schema/mutation.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/authorization/schema/mutation.py b/authorization/schema/mutation.py index 88103700..f6d7c2e9 100644 --- a/authorization/schema/mutation.py +++ b/authorization/schema/mutation.py @@ -111,15 +111,18 @@ def add_user_to_organization( organization = info.context.context.get("organization") role = Role.objects.get(id=input.role_id) + # If user trying to change self role, should raise error + if user.id == info.context.user.id: + raise ValueError("You cannot change your own role.") + # Check if the membership already exists membership, created = OrganizationMembership.objects.get_or_create( user=user, organization=organization, defaults={"role": role} ) - # If the membership exists but the role is different, update it - if not created and membership.role != role: - membership.role = role - membership.save() + # If the membership exists, raise error + if not created: + raise ValueError("User is already a member of this organization.") return TypeOrganizationMembership.from_django(membership) except User.DoesNotExist: From 7af8108bac800c58468df01a123c46ce92b5db37 Mon Sep 17 00:00:00 2001 From: dc Date: Mon, 23 Jun 2025 18:50:36 +0530 Subject: [PATCH 10/38] use base mutation for assiging user role --- authorization/schema/mutation.py | 40 ++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/authorization/schema/mutation.py b/authorization/schema/mutation.py index f6d7c2e9..52b0f73c 100644 --- a/authorization/schema/mutation.py +++ b/authorization/schema/mutation.py @@ -7,7 +7,12 @@ import structlog from strawberry.types import Info -from api.models import Dataset, Organization +from api.models import Dataset +from api.schema.base_mutation import ( + BaseMutation, + DjangoValidationError, + MutationResponse, +) from api.utils.graphql_telemetry import trace_resolver from authorization.models import OrganizationMembership, Role, User from authorization.permissions import IsAuthenticated @@ -95,16 +100,23 @@ def update_user(self, info: Info, input: UpdateUserInput) -> TypeUser: return TypeUser.from_django(user) - @strawberry_django.mutation( + @strawberry.mutation + @BaseMutation.mutation( permission_classes=[IsAuthenticated, HasOrganizationAdminRole], - ) - @trace_resolver( - name="add_user_to_organization", - attributes={"component": "user", "operation": "mutation"}, + trace_name="add_user_to_organization", + trace_attributes={"component": "user", "operation": "mutation"}, + track_activity={ + "verb": "added", + "get_data": lambda result, **kwargs: { + "user_id": str(result.user.id), + "organization_id": str(result.organization.id), + "role_id": str(result.role.id), + }, + }, ) def add_user_to_organization( self, info: Info, input: AddRemoveUserToOrganizationInput - ) -> TypeOrganizationMembership: + ) -> MutationResponse[TypeOrganizationMembership]: """Add a user to an organization with a specific role.""" try: user = User.objects.get(id=input.user_id) @@ -113,7 +125,7 @@ def add_user_to_organization( # If user trying to change self role, should raise error if user.id == info.context.user.id: - raise ValueError("You cannot change your own role.") + raise DjangoValidationError("You cannot change your own role.") # Check if the membership already exists membership, created = OrganizationMembership.objects.get_or_create( @@ -122,13 +134,17 @@ def add_user_to_organization( # If the membership exists, raise error if not created: - raise ValueError("User is already a member of this organization.") + raise DjangoValidationError( + "User is already a member of this organization." + ) - return TypeOrganizationMembership.from_django(membership) + return MutationResponse.success_response( + TypeOrganizationMembership.from_django(membership) + ) except User.DoesNotExist: - raise ValueError(f"User with ID {input.user_id} does not exist.") + raise DjangoValidationError(f"User with ID {input.user_id} does not exist.") except Role.DoesNotExist: - raise ValueError(f"Role with ID {input.role_id} does not exist.") + raise DjangoValidationError(f"Role with ID {input.role_id} does not exist.") @strawberry.mutation def assign_organization_role( From b341641789a6250b4e8e1325c5c278d65ac6ccac Mon Sep 17 00:00:00 2001 From: dc Date: Mon, 23 Jun 2025 19:09:11 +0530 Subject: [PATCH 11/38] add permission class to delete tag --- api/schema/tags_schema.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/api/schema/tags_schema.py b/api/schema/tags_schema.py index 5401d821..ef45a465 100644 --- a/api/schema/tags_schema.py +++ b/api/schema/tags_schema.py @@ -12,7 +12,9 @@ class Mutation: """Mutations for tags.""" - @strawberry_django.mutation(handle_django_errors=False) + @strawberry_django.mutation( + handle_django_errors=False, permission_classes=[IsAuthenticated] + ) @trace_resolver( name="delete_tag", attributes={"component": "tag", "operation": "mutation"} ) From fec23c5fa3a50df36a9bcf3229350e7b55d0ea74 Mon Sep 17 00:00:00 2001 From: dc Date: Mon, 23 Jun 2025 19:13:49 +0530 Subject: [PATCH 12/38] use DjangoValidationError when deleting tags --- api/schema/tags_schema.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/api/schema/tags_schema.py b/api/schema/tags_schema.py index ef45a465..5c549cbe 100644 --- a/api/schema/tags_schema.py +++ b/api/schema/tags_schema.py @@ -3,7 +3,11 @@ from strawberry.types import Info from api.models import Tag -from api.schema.base_mutation import BaseMutation, MutationResponse +from api.schema.base_mutation import ( + BaseMutation, + DjangoValidationError, + MutationResponse, +) from api.utils.graphql_telemetry import trace_resolver from authorization.permissions import IsAuthenticated @@ -38,6 +42,6 @@ def delete_tags(self, info: Info, tag_ids: list[str]) -> MutationResponse[bool]: try: tags = Tag.objects.filter(id__in=tag_ids) except Tag.DoesNotExist: - raise ValueError(f"Tags with IDs {tag_ids} do not exist.") + raise DjangoValidationError(f"Tags with IDs {tag_ids} do not exist.") tags.delete() return MutationResponse.success_response(True) From 33333bc0c230afecfc68e0982d1ed53c1d67aaf5 Mon Sep 17 00:00:00 2001 From: dc Date: Wed, 25 Jun 2025 12:20:14 +0530 Subject: [PATCH 13/38] add basic cloudformation templates --- .github/workflows/deploy-to-ecs.yml | 138 ++++++++ .pre-commit-config.yaml | 12 + .../dataspace-infrastructure.yml | 335 ++++++++++++++++++ aws/multi-service-architecture.md | 249 +++++++++++++ aws/otel-collector-task-definition.json | 81 +++++ aws/redis-task-definition.json | 53 +++ aws/task-definition.json | 61 ++++ 7 files changed, 929 insertions(+) create mode 100644 .github/workflows/deploy-to-ecs.yml create mode 100644 aws/cloudformation/dataspace-infrastructure.yml create mode 100644 aws/multi-service-architecture.md create mode 100644 aws/otel-collector-task-definition.json create mode 100644 aws/redis-task-definition.json create mode 100644 aws/task-definition.json diff --git a/.github/workflows/deploy-to-ecs.yml b/.github/workflows/deploy-to-ecs.yml new file mode 100644 index 00000000..8becd02b --- /dev/null +++ b/.github/workflows/deploy-to-ecs.yml @@ -0,0 +1,138 @@ +name: Deploy to Amazon ECS + +on: + push: + branches: + - dev + workflow_dispatch: + +env: + AWS_REGION: ${{ secrets.AWS_REGION }} + ECR_REPOSITORY: ${{ secrets.ECR_REPOSITORY }} + ECS_CLUSTER: ${{ secrets.ECS_CLUSTER }} + ECS_EXECUTION_ROLE_ARN: ${{ secrets.ECS_EXECUTION_ROLE_ARN }} + APP_NAME: dataspace + APP_PORT: 8000 + DB_ENGINE: django.db.backends.postgresql + DB_PORT: 5432 + DEBUG_MODE: "False" + TELEMETRY_URL: http://otel-collector:4317 + CPU_UNITS: 256 + MEMORY_UNITS: 512 + SSM_PATH_PREFIX: /dataspace + ENVIRONMENT: ${{ secrets.ENVIRONMENT || 'dev' }} + +jobs: + deploy-infrastructure: + name: Deploy Infrastructure + runs-on: ubuntu-latest + environment: development + if: github.event_name == 'workflow_dispatch' || contains(github.event.head_commit.modified, 'aws/cloudformation') + + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + + - name: Deploy CloudFormation stack + run: | + aws cloudformation deploy \ + --template-file aws/cloudformation/dataspace-infrastructure.yml \ + --stack-name dataspace-${{ env.ENVIRONMENT }}-infrastructure \ + --parameter-overrides \ + Environment=${{ env.ENVIRONMENT }} \ + VpcId=${{ secrets.VPC_ID }} \ + SubnetIds=${{ secrets.SUBNET_IDS }} \ + DBUsername=${{ secrets.DB_USERNAME }} \ + DBPassword=${{ secrets.DB_PASSWORD }} \ + DBName=${{ secrets.DB_NAME }} \ + ElasticsearchPassword=${{ secrets.ELASTICSEARCH_PASSWORD }} \ + DjangoSecretKey=${{ secrets.DJANGO_SECRET_KEY }} \ + --capabilities CAPABILITY_IAM \ + --no-fail-on-empty-changeset + + deploy-app: + name: Deploy Application + runs-on: ubuntu-latest + environment: development + needs: deploy-infrastructure + if: always() # Run even if infrastructure deployment is skipped + + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v1 + + - name: Build, tag, and push image to Amazon ECR + id: build-image + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + IMAGE_TAG: ${{ github.sha }} + run: | + docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG . + docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG + echo "image=$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" >> $GITHUB_OUTPUT + + - name: Process main task definition template + id: task-def-app + env: + ECR_REPOSITORY_URI: ${{ steps.login-ecr.outputs.registry }}/${{ env.ECR_REPOSITORY }} + IMAGE_TAG: ${{ github.sha }} + run: | + envsubst < aws/task-definition.json > aws/task-definition-processed.json + cat aws/task-definition-processed.json + + - name: Deploy main application ECS task definition + uses: aws-actions/amazon-ecs-deploy-task-definition@v1 + with: + task-definition: aws/task-definition-processed.json + service: ${{ secrets.ECS_SERVICE }} + cluster: ${{ env.ECS_CLUSTER }} + wait-for-service-stability: true + + deploy-otel: + name: Deploy OpenTelemetry Collector + runs-on: ubuntu-latest + environment: development + needs: deploy-app + + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + + - name: Process OpenTelemetry task definition template + id: task-def-otel + run: | + envsubst < aws/otel-collector-task-definition.json > aws/otel-collector-task-definition-processed.json + cat aws/otel-collector-task-definition-processed.json + + - name: Deploy OpenTelemetry ECS task definition + uses: aws-actions/amazon-ecs-deploy-task-definition@v1 + with: + task-definition: aws/otel-collector-task-definition-processed.json + service: ${{ secrets.ECS_OTEL_SERVICE }} + cluster: ${{ env.ECS_CLUSTER }} + wait-for-service-stability: true diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 186db564..25a71ade 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,6 +5,7 @@ repos: - id: trailing-whitespace - id: end-of-file-fixer - id: check-yaml + exclude: ^aws/cloudformation/.*\.yml$ - id: check-added-large-files - id: debug-statements @@ -20,6 +21,17 @@ repos: - id: isort args: ["--profile", "black"] +- repo: local + hooks: + - id: cloudformation-validate + name: AWS CloudFormation Validation + description: Validates CloudFormation templates using AWS CLI + entry: bash -c 'aws cloudformation validate-template --template-body file://$0 || exit 1' + language: system + files: ^aws/cloudformation/.*\.yml$ + require_serial: true + pass_filenames: true + - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.9.0 hooks: diff --git a/aws/cloudformation/dataspace-infrastructure.yml b/aws/cloudformation/dataspace-infrastructure.yml new file mode 100644 index 00000000..2bece03d --- /dev/null +++ b/aws/cloudformation/dataspace-infrastructure.yml @@ -0,0 +1,335 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: 'DataSpace Application Infrastructure' + +Parameters: + Environment: + Description: Environment name (dev, staging, prod) + Type: String + Default: dev + AllowedValues: + - dev + - staging + - prod + + VpcId: + Description: ID of the VPC + Type: AWS::EC2::VPC::Id + + SubnetIds: + Description: List of subnet IDs for the application + Type: List + + DBUsername: + Description: Database username + Type: String + NoEcho: true + + DBPassword: + Description: Database password + Type: String + NoEcho: true + + DBName: + Description: Database name + Type: String + Default: dataspace + + DBInstanceClass: + Description: Database instance class + Type: String + Default: db.t3.small + + ElasticsearchInstanceType: + Description: Elasticsearch instance type + Type: String + Default: t3.small.elasticsearch + + ElasticsearchPassword: + Description: Elasticsearch password + Type: String + NoEcho: true + Default: changeme + + RedisNodeType: + Description: Redis node type + Type: String + Default: cache.t3.small + + DjangoSecretKey: + Description: Django secret key + Type: String + NoEcho: true + +Resources: + # Security Groups + DatabaseSecurityGroup: + Type: AWS::EC2::SecurityGroup + Properties: + GroupDescription: Security group for RDS database + VpcId: !Ref VpcId + SecurityGroupIngress: + - IpProtocol: tcp + FromPort: 5432 + ToPort: 5432 + SourceSecurityGroupId: !Ref ECSSecurityGroup + + ECSSecurityGroup: + Type: AWS::EC2::SecurityGroup + Properties: + GroupDescription: Security group for ECS tasks + VpcId: !Ref VpcId + SecurityGroupIngress: + - IpProtocol: tcp + FromPort: 8000 + ToPort: 8000 + CidrIp: 0.0.0.0/0 + + ElasticsearchSecurityGroup: + Type: AWS::EC2::SecurityGroup + Properties: + GroupDescription: Security group for Elasticsearch + VpcId: !Ref VpcId + SecurityGroupIngress: + - IpProtocol: tcp + FromPort: 443 + ToPort: 443 + SourceSecurityGroupId: !Ref ECSSecurityGroup + + RedisSecurityGroup: + Type: AWS::EC2::SecurityGroup + Properties: + GroupDescription: Security group for Redis + VpcId: !Ref VpcId + SecurityGroupIngress: + - IpProtocol: tcp + FromPort: 6379 + ToPort: 6379 + SourceSecurityGroupId: !Ref ECSSecurityGroup + + # Database + DatabaseSubnetGroup: + Type: AWS::RDS::DBSubnetGroup + Properties: + DBSubnetGroupDescription: Subnet group for DataSpace database + SubnetIds: !Ref SubnetIds + + Database: + Type: AWS::RDS::DBInstance + Properties: + AllocatedStorage: 20 + DBInstanceClass: !Ref DBInstanceClass + Engine: postgres + EngineVersion: '14.4' + MasterUsername: !Ref DBUsername + MasterUserPassword: !Ref DBPassword + DBName: !Ref DBName + VPCSecurityGroups: + - !GetAtt DatabaseSecurityGroup.GroupId + DBSubnetGroupName: !Ref DatabaseSubnetGroup + MultiAZ: false + StorageType: gp2 + Tags: + - Key: Name + Value: !Sub dataspace-${Environment}-db + DeletionPolicy: Snapshot + + # Elasticsearch Domain + ElasticsearchDomain: + Type: AWS::Elasticsearch::Domain + Properties: + DomainName: !Sub dataspace-${Environment} + ElasticsearchVersion: '7.10' + ElasticsearchClusterConfig: + InstanceType: !Ref ElasticsearchInstanceType + InstanceCount: 1 + EBSOptions: + EBSEnabled: true + VolumeType: gp2 + VolumeSize: 10 + AccessPolicies: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + AWS: !Sub 'arn:aws:iam::${AWS::AccountId}:root' + Action: 'es:*' + Resource: !Sub 'arn:aws:es:${AWS::Region}:${AWS::AccountId}:domain/dataspace-${Environment}/*' + AdvancedSecurityOptions: + Enabled: true + InternalUserDatabaseEnabled: true + MasterUserOptions: + MasterUserName: elastic + MasterUserPassword: !Ref ElasticsearchPassword + EncryptionAtRestOptions: + Enabled: true + NodeToNodeEncryptionOptions: + Enabled: true + DomainEndpointOptions: + EnforceHTTPS: true + VPCOptions: + SecurityGroupIds: + - !GetAtt ElasticsearchSecurityGroup.GroupId + SubnetIds: + - !Select [0, !Ref SubnetIds] + # Note: AWS::Elasticsearch::Domain does not support DeletionPolicy: Snapshot + + # Redis Cache + RedisSubnetGroup: + Type: AWS::ElastiCache::SubnetGroup + Properties: + Description: Subnet group for DataSpace Redis + SubnetIds: !Ref SubnetIds + + RedisCluster: + Type: AWS::ElastiCache::CacheCluster + Properties: + CacheNodeType: !Ref RedisNodeType + Engine: redis + NumCacheNodes: 1 + VpcSecurityGroupIds: + - !GetAtt RedisSecurityGroup.GroupId + CacheSubnetGroupName: !Ref RedisSubnetGroup + Tags: + - Key: Name + Value: !Sub dataspace-${Environment}-redis + + # ECS Cluster + ECSCluster: + Type: AWS::ECS::Cluster + Properties: + ClusterName: !Sub dataspace-${Environment}-cluster + CapacityProviders: + - FARGATE + - FARGATE_SPOT + DefaultCapacityProviderStrategy: + - CapacityProvider: FARGATE + Weight: 1 + Tags: + - Key: Name + Value: !Sub dataspace-${Environment}-cluster + + # IAM Roles + ECSTaskExecutionRole: + Type: AWS::IAM::Role + Properties: + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: ecs-tasks.amazonaws.com + Action: sts:AssumeRole + ManagedPolicyArns: + - arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy + Policies: + - PolicyName: SSMParameterAccess + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - ssm:GetParameters + - ssm:GetParameter + Resource: !Sub 'arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/dataspace/*' + + # SSM Parameters + DBHostParameter: + Type: AWS::SSM::Parameter + Properties: + Name: /dataspace/DB_HOST + Type: String + Value: !GetAtt Database.Endpoint.Address + Description: Database host + + DBNameParameter: + Type: AWS::SSM::Parameter + Properties: + Name: /dataspace/DB_NAME + Type: String + Value: !Ref DBName + Description: Database name + + DBUserParameter: + Type: AWS::SSM::Parameter + Properties: + Name: /dataspace/DB_USER + Type: String + Value: !Ref DBUsername + Description: Database username + + DBPasswordParameter: + Type: AWS::SSM::Parameter + Properties: + Name: /dataspace/DB_PASSWORD + Type: SecureString + Value: !Ref DBPassword + Description: Database password + + ElasticsearchIndexParameter: + Type: AWS::SSM::Parameter + Properties: + Name: /dataspace/ELASTICSEARCH_INDEX + Type: String + Value: !GetAtt ElasticsearchDomain.DomainEndpoint + Description: Elasticsearch endpoint + + ElasticsearchUsernameParameter: + Type: AWS::SSM::Parameter + Properties: + Name: /dataspace/ELASTICSEARCH_USERNAME + Type: String + Value: elastic + Description: Elasticsearch username + + ElasticsearchPasswordParameter: + Type: AWS::SSM::Parameter + Properties: + Name: /dataspace/ELASTICSEARCH_PASS + Type: SecureString + Value: !Ref ElasticsearchPassword + Description: Elasticsearch password + + RedisHostParameter: + Type: AWS::SSM::Parameter + Properties: + Name: /dataspace/REDIS_HOST + Type: String + Value: !GetAtt RedisCluster.RedisEndpoint.Address + Description: Redis host + + SecretKeyParameter: + Type: AWS::SSM::Parameter + Properties: + Name: /dataspace/SECRET_KEY + Type: SecureString + Value: !Ref DjangoSecretKey + Description: Django secret key + + URLWhitelistParameter: + Type: AWS::SSM::Parameter + Properties: + Name: /dataspace/URL_WHITELIST + Type: String + Value: !Sub 'https://dataspace-${Environment}.yourdomain.com' + Description: URL whitelist + +Outputs: + ClusterName: + Description: ECS Cluster Name + Value: !Ref ECSCluster + + DatabaseEndpoint: + Description: Database endpoint + Value: !GetAtt Database.Endpoint.Address + + ElasticsearchEndpoint: + Description: Elasticsearch endpoint + Value: !GetAtt ElasticsearchDomain.DomainEndpoint + + RedisEndpoint: + Description: Redis endpoint + Value: !GetAtt RedisCluster.RedisEndpoint.Address + + TaskExecutionRoleArn: + Description: ECS Task Execution Role ARN + Value: !GetAtt ECSTaskExecutionRole.Arn diff --git a/aws/multi-service-architecture.md b/aws/multi-service-architecture.md new file mode 100644 index 00000000..6bfc34cb --- /dev/null +++ b/aws/multi-service-architecture.md @@ -0,0 +1,249 @@ +# DataSpace Multi-Service Architecture for AWS ECS + +This document outlines the architecture for deploying the DataSpace application and its dependent services to AWS ECS. + +## Architecture Overview + +The DataSpace application consists of several services that work together: + +1. **Backend Application (dataspace)** - The main Django/Python application +2. **PostgreSQL Database (backend_db)** - Database for the application +3. **Elasticsearch** - For search functionality +4. **Redis** - For caching and possibly message queuing +5. **Telemetry Services** - Including OpenTelemetry Collector + +## AWS Services Mapping + +For production deployment on AWS, we use the following mapping: + +| Local Service | AWS Service | Justification | +|---------------|-------------|---------------| +| dataspace (backend) | ECS Fargate | Containerized application, managed by ECS | +| backend_db | Amazon RDS for PostgreSQL | Managed database service with backups, high availability | +| elasticsearch | Amazon Elasticsearch Service | Managed Elasticsearch with scaling and security | +| redis | Amazon ElastiCache for Redis | Managed Redis with high availability | +| otel-collector | ECS Fargate (separate task) | Deployed as a separate container service | + +## Deployment Architecture + +### Infrastructure as Code + +All AWS resources are provisioned using CloudFormation templates located in `aws/cloudformation/`. The main template `dataspace-infrastructure.yml` creates: + +1. **Security Groups** - For RDS, Elasticsearch, Redis, and ECS services +2. **Amazon RDS PostgreSQL** - Managed database with subnet group +3. **Amazon Elasticsearch Service** - Managed Elasticsearch domain with security and access policies +4. **Amazon ElastiCache Redis** - Managed Redis cluster +5. **ECS Cluster** - With Fargate and Fargate Spot capacity providers +6. **IAM Roles** - For ECS task execution with appropriate permissions +7. **SSM Parameters** - For storing sensitive connection information + +### ECS Task Definitions + +The application is deployed using two main ECS task definitions: + +1. **Main Application (`aws/task-definition.json`)** - Deploys the Django application container with: + - Environment variables for configuration + - Secrets from SSM Parameter Store for sensitive data + - Health checks and logging configuration + - Network configuration for service discovery + +2. **OpenTelemetry Collector (`aws/otel-collector-task-definition.json`)** - Deploys the telemetry collector with: + - Port mappings for various telemetry protocols + - Volume mounts for configuration + - Health checks and logging + +### Managed Services Integration + +#### Amazon RDS PostgreSQL + +The PostgreSQL database is provisioned as a managed RDS instance with: + +- Automated backups +- Security group restrictions (only accessible from ECS tasks) +- Credentials stored in SSM Parameter Store +- Connection information injected into the application container as environment variables + +#### Amazon Elasticsearch Service + +Elasticsearch is provisioned as a managed service with: + +- Fine-grained access control +- HTTPS encryption +- Security group restrictions +- Connection information stored in SSM Parameter Store + +#### Amazon ElastiCache Redis + +Redis is provisioned as a managed ElastiCache cluster with: + +- Security group restrictions +- Connection information stored in SSM Parameter Store +- Host and port injected into the application container + +## CI/CD Pipeline + +The deployment is automated using GitHub Actions workflow (`.github/workflows/deploy-to-ecs.yml`) that: + +1. **Triggers** on pushes to the `dev` branch or manual workflow dispatch +2. **Deploys Infrastructure** using CloudFormation (conditionally based on changes) +3. **Builds and Pushes** Docker images to Amazon ECR +4. **Deploys Application** using ECS task definitions with environment variable substitution +5. **Deploys OpenTelemetry Collector** as a separate ECS service + +### Idempotent Infrastructure Creation + +The CloudFormation template is designed to be idempotent by: + +1. Using the `--no-fail-on-empty-changeset` flag in CloudFormation deployment +2. Setting appropriate `DeletionPolicy` and `UpdateReplacePolicy` attributes on resources +3. Using conditional resource creation based on environment parameters + +This ensures that: +- If resources already exist, they won't be recreated unnecessarily +- Database and Elasticsearch data is preserved during updates +- Application code can be updated independently of infrastructure + +## Environment Variables and Secrets Management + +The deployment uses a three-tier approach to configuration: + +1. **GitHub Repository Secrets** - For AWS credentials and sensitive parameters +2. **Environment Variables** - For non-sensitive configuration in CI/CD and ECS tasks +3. **AWS SSM Parameter Store** - For service connection information and secrets + +### Required GitHub Secrets + +- `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` - AWS credentials +- `AWS_REGION` - Target AWS region +- `ECR_REPOSITORY` - ECR repository name +- `ECS_CLUSTER` - ECS cluster name +- `ECS_SERVICE` - Main application ECS service name +- `ECS_OTEL_SERVICE` - OpenTelemetry collector ECS service name +- `ECS_EXECUTION_ROLE_ARN` - ECS task execution role ARN +- `VPC_ID` and `SUBNET_IDS` - VPC and subnet IDs +- `DB_USERNAME`, `DB_PASSWORD`, `DB_NAME` - Database credentials +- `ELASTICSEARCH_PASSWORD` - Elasticsearch password +- `DJANGO_SECRET_KEY` - Django secret key +- `ENVIRONMENT` - Deployment environment (dev, staging, prod) + +## Scaling and High Availability + +The architecture supports scaling and high availability through: + +1. **ECS Fargate** - Automatic scaling based on CPU/memory usage +2. **RDS Multi-AZ** - Optional database high availability +3. **ElastiCache Replication** - Optional Redis replication +4. **Elasticsearch Multi-Node** - Optional Elasticsearch cluster scaling + +## Monitoring and Observability + +The deployment includes observability through: + +1. **CloudWatch Logs** - For all ECS services +2. **OpenTelemetry Collector** - For metrics, traces, and logs collection +3. **Health Checks** - For all services to ensure availability + +## Security Considerations + +The deployment implements security best practices: + +1. **IAM Least Privilege** - Task execution role with minimal permissions +2. **Security Groups** - Restrict access between services +3. **Secrets Management** - Sensitive data in SSM Parameter Store +4. **Network Isolation** - Services in private subnets where appropriate +5. **HTTPS** - For all external communication + +This approach uses AWS managed services where possible and ECS only for custom application containers: + +- **Backend Application**: ECS Fargate Task/Service +- **Database**: Amazon RDS +- **Elasticsearch**: Amazon Elasticsearch Service +- **Redis**: Amazon ElastiCache +- **Telemetry**: Amazon Elasticsearch Service + ECS for collectors/agents + +### Option 2: ECS for Everything + +This approach deploys everything as containers in ECS: + +- **Backend Application**: ECS Fargate Task/Service +- **Database**: ECS Fargate Task with PostgreSQL container + EBS volume +- **Elasticsearch**: ECS Fargate Task with Elasticsearch container + EBS volume +- **Redis**: ECS Fargate Task with Redis container +- **Telemetry**: ECS Fargate Tasks for all telemetry services + +### Recommended Approach + +We recommend **Option 1** for production workloads because: + +1. Managed services handle backups, high availability, and security patches +2. Reduced operational overhead +3. Better scalability and reliability +4. Separation of concerns + +## Implementation Plan + +### 1. Create AWS Managed Services + +First, create the necessary managed services: + +- **RDS PostgreSQL Instance** +- **ElastiCache Redis Cluster** +- **Amazon Elasticsearch Service Domain(s)** + +### 2. Update Task Definition for Backend Application + +The task definition we've already created focuses on the backend application. It needs to be updated with connection information for the managed services. + +### 3. Create Task Definitions for Custom Services + +For services that don't have AWS managed equivalents (like otel-collector), create separate task definitions. + +### 4. Update CI/CD Pipeline + +Update the GitHub Actions workflow to: + +1. Deploy infrastructure changes if needed (using Terraform or CloudFormation) +2. Deploy application containers to ECS + +## Example: RDS Configuration + +```bash +# Create RDS instance +aws rds create-db-instance \ + --db-instance-identifier dataspace-db \ + --db-instance-class db.t3.small \ + --engine postgres \ + --master-username ${DB_USERNAME} \ + --master-user-password ${DB_PASSWORD} \ + --allocated-storage 20 +``` + +## Example: ElastiCache Configuration + +```bash +# Create ElastiCache cluster +aws elasticache create-cache-cluster \ + --cache-cluster-id dataspace-redis \ + --engine redis \ + --cache-node-type cache.t3.small \ + --num-cache-nodes 1 +``` + +## Example: Amazon Elasticsearch Service + +```bash +# Create Elasticsearch domain +aws es create-elasticsearch-domain \ + --domain-name dataspace-search \ + --elasticsearch-version 7.10 \ + --elasticsearch-cluster-config InstanceType=t3.small.elasticsearch,InstanceCount=1 \ + --ebs-options EBSEnabled=true,VolumeType=gp2,VolumeSize=10 +``` + +## Next Steps + +1. Create CloudFormation or Terraform templates for the infrastructure +2. Update the ECS task definition with connection information for managed services +3. Create separate task definitions for services that need to run in ECS +4. Update the CI/CD pipeline to deploy all components diff --git a/aws/otel-collector-task-definition.json b/aws/otel-collector-task-definition.json new file mode 100644 index 00000000..cc5e1d37 --- /dev/null +++ b/aws/otel-collector-task-definition.json @@ -0,0 +1,81 @@ +{ + "family": "${APP_NAME}-otel-collector", + "executionRoleArn": "${ECS_EXECUTION_ROLE_ARN}", + "networkMode": "awsvpc", + "containerDefinitions": [ + { + "name": "${APP_NAME}-otel-collector", + "image": "otel/opentelemetry-collector:latest", + "essential": true, + "portMappings": [ + { + "containerPort": 4317, + "hostPort": 4317, + "protocol": "tcp" + }, + { + "containerPort": 4318, + "hostPort": 4318, + "protocol": "tcp" + }, + { + "containerPort": 55680, + "hostPort": 55680, + "protocol": "tcp" + }, + { + "containerPort": 55681, + "hostPort": 55681, + "protocol": "tcp" + } + ], + "environment": [ + { "name": "ENVIRONMENT", "value": "${ENVIRONMENT}" } + ], + "mountPoints": [ + { + "sourceVolume": "otel-config", + "containerPath": "/etc/otel-collector-config.yml", + "readOnly": true + } + ], + "logConfiguration": { + "logDriver": "awslogs", + "options": { + "awslogs-group": "/ecs/${APP_NAME}-otel-collector", + "awslogs-region": "${AWS_REGION}", + "awslogs-stream-prefix": "ecs" + } + }, + "healthCheck": { + "command": ["CMD-SHELL", "curl -f http://localhost:13133/ || exit 1"], + "interval": 30, + "timeout": 5, + "retries": 3, + "startPeriod": 60 + } + } + ], + "volumes": [ + { + "name": "otel-config", + "dockerVolumeConfiguration": { + "scope": "task", + "driver": "local", + "labels": { + "app": "${APP_NAME}", + "component": "otel-collector" + } + } + } + ], + "requiresCompatibilities": ["FARGATE"], + "cpu": "256", + "memory": "512", + "tags": [ + { "key": "Environment", "value": "${ENVIRONMENT}" }, + { "key": "Application", "value": "${APP_NAME}" }, + { "key": "Component", "value": "otel-collector" }, + { "key": "ManagedBy", "value": "GitHub-Actions" } + ] +} diff --git a/aws/redis-task-definition.json b/aws/redis-task-definition.json new file mode 100644 index 00000000..2a823bd3 --- /dev/null +++ b/aws/redis-task-definition.json @@ -0,0 +1,53 @@ +{ + "family": "${APP_NAME}-redis", + "executionRoleArn": "${ECS_EXECUTION_ROLE_ARN}", + "networkMode": "awsvpc", + "containerDefinitions": [ + { + "name": "${APP_NAME}-redis", + "image": "redis:alpine", + "essential": true, + "portMappings": [ + { + "containerPort": 6379, + "hostPort": 6379, + "protocol": "tcp" + } + ], + "logConfiguration": { + "logDriver": "awslogs", + "options": { + "awslogs-group": "/ecs/${APP_NAME}-redis", + "awslogs-region": "${AWS_REGION}", + "awslogs-stream-prefix": "ecs" + } + }, + "healthCheck": { + "command": ["CMD-SHELL", "redis-cli ping | grep -q 'PONG'"], + "interval": 10, + "timeout": 5, + "retries": 5, + "startPeriod": 10 + }, + "mountPoints": [ + { + "sourceVolume": "redis-data", + "containerPath": "/data" + } + ] + } + ], + "volumes": [ + { + "name": "redis-data", + "efsVolumeConfiguration": { + "fileSystemId": "${EFS_ID}", + "rootDirectory": "/redis-data", + "transitEncryption": "ENABLED" + } + } + ], + "requiresCompatibilities": ["FARGATE"], + "cpu": "512", + "memory": "1024" +} diff --git a/aws/task-definition.json b/aws/task-definition.json new file mode 100644 index 00000000..e8b35399 --- /dev/null +++ b/aws/task-definition.json @@ -0,0 +1,61 @@ +{ + "family": "${APP_NAME}", + "executionRoleArn": "${ECS_EXECUTION_ROLE_ARN}", + "networkMode": "awsvpc", + "containerDefinitions": [ + { + "name": "${APP_NAME}", + "image": "${ECR_REPOSITORY_URI}:${IMAGE_TAG}", + "essential": true, + "portMappings": [ + { + "containerPort": "${APP_PORT}", + "hostPort": "${APP_PORT}", + "protocol": "tcp" + } + ], + "environment": [ + { "name": "DB_ENGINE", "value": "${DB_ENGINE}" }, + { "name": "DB_PORT", "value": "${DB_PORT}" }, + { "name": "DEBUG", "value": "${DEBUG_MODE}" }, + { "name": "TELEMETRY_URL", "value": "${TELEMETRY_URL}" }, + { "name": "REDIS_PORT", "value": "6379" } + ], + "secrets": [ + { "name": "DB_HOST", "valueFrom": "${SSM_PATH_PREFIX}/DB_HOST" }, + { "name": "DB_NAME", "valueFrom": "${SSM_PATH_PREFIX}/DB_NAME" }, + { "name": "DB_USER", "valueFrom": "${SSM_PATH_PREFIX}/DB_USER" }, + { "name": "DB_PASSWORD", "valueFrom": "${SSM_PATH_PREFIX}/DB_PASSWORD" }, + { "name": "SECRET_KEY", "valueFrom": "${SSM_PATH_PREFIX}/SECRET_KEY" }, + { "name": "ELASTICSEARCH_INDEX", "valueFrom": "${SSM_PATH_PREFIX}/ELASTICSEARCH_INDEX" }, + { "name": "ELASTICSEARCH_USERNAME", "valueFrom": "${SSM_PATH_PREFIX}/ELASTICSEARCH_USERNAME" }, + { "name": "ELASTICSEARCH_PASS", "valueFrom": "${SSM_PATH_PREFIX}/ELASTICSEARCH_PASS" }, + { "name": "URL_WHITELIST", "valueFrom": "${SSM_PATH_PREFIX}/URL_WHITELIST" }, + { "name": "REDIS_HOST", "valueFrom": "${SSM_PATH_PREFIX}/REDIS_HOST" } + ], + "logConfiguration": { + "logDriver": "awslogs", + "options": { + "awslogs-group": "/ecs/${APP_NAME}", + "awslogs-region": "${AWS_REGION}", + "awslogs-stream-prefix": "ecs" + } + }, + "healthCheck": { + "command": ["CMD-SHELL", "curl -f http://localhost:${APP_PORT}/health/ || exit 1"], + "interval": 30, + "timeout": 5, + "retries": 3, + "startPeriod": 60 + } + } + ], + "requiresCompatibilities": ["FARGATE"], + "cpu": "${CPU_UNITS}", + "memory": "${MEMORY_UNITS}", + "tags": [ + { "key": "Environment", "value": "${ENVIRONMENT}" }, + { "key": "Application", "value": "${APP_NAME}" }, + { "key": "ManagedBy", "value": "GitHub-Actions" } + ] +} From bb1e96bcaa5e8763d6e2f8873dd5c07a5d390ae8 Mon Sep 17 00:00:00 2001 From: dc Date: Wed, 25 Jun 2025 13:49:03 +0530 Subject: [PATCH 14/38] always retain existing description --- api/utils/data_indexing.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/api/utils/data_indexing.py b/api/utils/data_indexing.py index 3f2af763..679a2c86 100644 --- a/api/utils/data_indexing.py +++ b/api/utils/data_indexing.py @@ -163,12 +163,7 @@ def index_resource_data(resource: Resource) -> Optional[ResourceDataTable]: if col in existing_schemas: existing_description = existing_schemas[col]["description"] # Check for None and non-auto-generated descriptions - if ( - existing_description is not None - and not existing_description.startswith( - "Description of column" - ) - ): + if existing_description is not None: description = existing_description logger.info( f"Preserved custom description for column {col}" From 2b5fb9e60d43d2d6281a6eea3adc41d35cf194d7 Mon Sep 17 00:00:00 2001 From: dc Date: Wed, 25 Jun 2025 14:01:35 +0530 Subject: [PATCH 15/38] handle updating resource schema when updating file --- api/schema/resource_schema.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/api/schema/resource_schema.py b/api/schema/resource_schema.py index 194842ef..b502ace2 100644 --- a/api/schema/resource_schema.py +++ b/api/schema/resource_schema.py @@ -159,6 +159,11 @@ def _create_file_resource_schema(resource: Resource) -> None: return +def _reset_file_resource_schema(resource: Resource) -> None: + ResourceSchema.objects.filter(resource=resource).delete() + data_table = index_resource_data(resource) + + def _update_file_resource_schema( resource: Resource, updated_schema: List[SchemaUpdate] ) -> None: @@ -262,6 +267,7 @@ def create_file_resources( file=file, size=file.size, resource=resource ) _validate_file_details_and_update_format(resource) + _create_file_resource_schema(resource) resources.append(TypeResource.from_django(resource)) return resources @@ -351,6 +357,8 @@ def update_file_resource( size=file_resource_input.file.size, resource=resource, ) + _validate_file_details_and_update_format(resource) + _create_file_resource_schema(resource) if file_resource_input.preview_details: _update_resource_preview_details(file_resource_input, resource) From 37c6006cc945e3495ffadb3e675321d382a50803 Mon Sep 17 00:00:00 2001 From: dc Date: Wed, 25 Jun 2025 17:04:57 +0530 Subject: [PATCH 16/38] config changes to infra --- aws/cloudformation/dataspace-infrastructure.yml | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/aws/cloudformation/dataspace-infrastructure.yml b/aws/cloudformation/dataspace-infrastructure.yml index 2bece03d..70e8b9b2 100644 --- a/aws/cloudformation/dataspace-infrastructure.yml +++ b/aws/cloudformation/dataspace-infrastructure.yml @@ -116,10 +116,10 @@ Resources: Database: Type: AWS::RDS::DBInstance Properties: - AllocatedStorage: 20 + AllocatedStorage: 30 DBInstanceClass: !Ref DBInstanceClass Engine: postgres - EngineVersion: '14.4' + EngineVersion: '17.4' MasterUsername: !Ref DBUsername MasterUserPassword: !Ref DBPassword DBName: !Ref DBName @@ -261,9 +261,10 @@ Resources: Type: AWS::SSM::Parameter Properties: Name: /dataspace/DB_PASSWORD - Type: SecureString + Type: String Value: !Ref DBPassword Description: Database password + Tier: Standard ElasticsearchIndexParameter: Type: AWS::SSM::Parameter @@ -285,9 +286,10 @@ Resources: Type: AWS::SSM::Parameter Properties: Name: /dataspace/ELASTICSEARCH_PASS - Type: SecureString + Type: String Value: !Ref ElasticsearchPassword Description: Elasticsearch password + Tier: Standard RedisHostParameter: Type: AWS::SSM::Parameter @@ -301,9 +303,10 @@ Resources: Type: AWS::SSM::Parameter Properties: Name: /dataspace/SECRET_KEY - Type: SecureString + Type: String Value: !Ref DjangoSecretKey Description: Django secret key + Tier: Standard URLWhitelistParameter: Type: AWS::SSM::Parameter From 674948cbf200bddcfec7fcad39e63ba2d4953138 Mon Sep 17 00:00:00 2001 From: dc Date: Wed, 25 Jun 2025 17:05:21 +0530 Subject: [PATCH 17/38] add templates for otel and main ecs services --- ...el-collector-task-definition.json.template | 67 +++++++++++++++++++ aws/task-definition.json.template | 62 +++++++++++++++++ 2 files changed, 129 insertions(+) create mode 100644 aws/otel-collector-task-definition.json.template create mode 100644 aws/task-definition.json.template diff --git a/aws/otel-collector-task-definition.json.template b/aws/otel-collector-task-definition.json.template new file mode 100644 index 00000000..966cdee4 --- /dev/null +++ b/aws/otel-collector-task-definition.json.template @@ -0,0 +1,67 @@ +{ + "family": "dataspace-otel-collector", + "executionRoleArn": "${ECS_EXECUTION_ROLE_ARN}", + "networkMode": "awsvpc", + "requiresCompatibilities": ["FARGATE"], + "cpu": "${OTEL_CPU_UNITS}", + "memory": "${OTEL_MEMORY_UNITS}", + "containerDefinitions": [ + { + "name": "otel-collector", + "image": "otel/opentelemetry-collector:${OTEL_VERSION}", + "essential": true, + "portMappings": [ + { + "containerPort": 4317, + "hostPort": 4317, + "protocol": "tcp" + }, + { + "containerPort": 4318, + "hostPort": 4318, + "protocol": "tcp" + }, + { + "containerPort": 8888, + "hostPort": 8888, + "protocol": "tcp" + }, + { + "containerPort": 8889, + "hostPort": 8889, + "protocol": "tcp" + } + ], + "environment": [ + { "name": "OTEL_RESOURCE_ATTRIBUTES", "value": "service.name=dataspace-telemetry,deployment.environment=${ENVIRONMENT}" }, + { "name": "OTEL_CONFIG", "value": "receivers:\n otlp:\n protocols:\n grpc:\n http:\n prometheus:\n config:\n scrape_configs:\n - job_name: 'otel-collector'\n scrape_interval: 10s\n static_configs:\n - targets: ['0.0.0.0:8888']\nexporters:\n logging:\n verbosity: detailed\n prometheus:\n endpoint: 0.0.0.0:8889\nservice:\n pipelines:\n traces:\n receivers: [otlp]\n exporters: [logging]\n metrics:\n receivers: [otlp, prometheus]\n exporters: [prometheus, logging]" }, + { "name": "OTEL_CONFIG_PATH", "value": "/etc/otel/config.yaml" } + ], + "command": [ + "--config=env:OTEL_CONFIG" + ], + "logConfiguration": { + "logDriver": "awslogs", + "options": { + "awslogs-group": "/ecs/dataspace-otel", + "awslogs-region": "${AWS_REGION}", + "awslogs-stream-prefix": "ecs", + "awslogs-create-group": "true" + } + }, + "healthCheck": { + "command": ["CMD-SHELL", "curl -f http://localhost:8888/health || exit 1"], + "interval": 30, + "timeout": 5, + "retries": 3, + "startPeriod": 60 + } + } + ], + "volumes": [], + "tags": [ + { "key": "Environment", "value": "${ENVIRONMENT}" }, + { "key": "Application", "value": "dataspace-telemetry" }, + { "key": "ManagedBy", "value": "CloudFormation" } + ] +} diff --git a/aws/task-definition.json.template b/aws/task-definition.json.template new file mode 100644 index 00000000..fdd73659 --- /dev/null +++ b/aws/task-definition.json.template @@ -0,0 +1,62 @@ +{ + "family": "dataspace", + "executionRoleArn": "${ECS_EXECUTION_ROLE_ARN}", + "networkMode": "awsvpc", + "requiresCompatibilities": ["FARGATE"], + "cpu": "${CPU_UNITS}", + "memory": "${MEMORY_UNITS}", + "containerDefinitions": [ + { + "name": "dataspace", + "image": "${ECR_REPOSITORY}:${IMAGE_TAG}", + "essential": true, + "portMappings": [ + { + "containerPort": ${APP_PORT}, + "hostPort": ${APP_PORT}, + "protocol": "tcp" + } + ], + "environment": [ + { "name": "DEBUG", "value": "${DEBUG_MODE}" }, + { "name": "APP_PORT", "value": "${APP_PORT}" }, + { "name": "DB_ENGINE", "value": "${DB_ENGINE}" }, + { "name": "DB_PORT", "value": "${DB_PORT}" }, + { "name": "TELEMETRY_URL", "value": "${TELEMETRY_URL}" } + ], + "secrets": [ + { "name": "DB_HOST", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/DB_HOST" }, + { "name": "DB_NAME", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/DB_NAME" }, + { "name": "DB_USER", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/DB_USER" }, + { "name": "DB_PASSWORD", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/DB_PASSWORD" }, + { "name": "ELASTICSEARCH_INDEX", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/ELASTICSEARCH_INDEX" }, + { "name": "ELASTICSEARCH_USERNAME", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/ELASTICSEARCH_USERNAME" }, + { "name": "ELASTICSEARCH_PASS", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/ELASTICSEARCH_PASS" }, + { "name": "REDIS_HOST", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/REDIS_HOST" }, + { "name": "SECRET_KEY", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/SECRET_KEY" }, + { "name": "URL_WHITELIST", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/URL_WHITELIST" } + ], + "logConfiguration": { + "logDriver": "awslogs", + "options": { + "awslogs-group": "/ecs/dataspace", + "awslogs-region": "${AWS_REGION}", + "awslogs-stream-prefix": "ecs", + "awslogs-create-group": "true" + } + }, + "healthCheck": { + "command": ["CMD-SHELL", "curl -f http://localhost:${APP_PORT}/health/ || exit 1"], + "interval": 30, + "timeout": 5, + "retries": 3, + "startPeriod": 60 + } + } + ], + "tags": [ + { "key": "Environment", "value": "${ENVIRONMENT}" }, + { "key": "Application", "value": "dataspace" }, + { "key": "ManagedBy", "value": "CloudFormation" } + ] +} From 87188f2e331cc89e3ab1edb5213eba670c5d0e2e Mon Sep 17 00:00:00 2001 From: dc Date: Wed, 25 Jun 2025 17:05:49 +0530 Subject: [PATCH 18/38] ignore aws env files --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 434ee87c..707db789 100644 --- a/.gitignore +++ b/.gitignore @@ -155,3 +155,7 @@ resources/ .env api/migrations/* authorization/migrations/* + + +# AWS files +aws/.env.* From ac69774e33de13604f7909e2b292fa68afac2207 Mon Sep 17 00:00:00 2001 From: dc Date: Wed, 25 Jun 2025 18:40:25 +0530 Subject: [PATCH 19/38] use existing task definition to deploy --- .github/workflows/deploy-to-ecs.yml | 35 ++++++++++++++++++----------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/.github/workflows/deploy-to-ecs.yml b/.github/workflows/deploy-to-ecs.yml index 8becd02b..64424b94 100644 --- a/.github/workflows/deploy-to-ecs.yml +++ b/.github/workflows/deploy-to-ecs.yml @@ -89,19 +89,26 @@ jobs: docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG echo "image=$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" >> $GITHUB_OUTPUT - - name: Process main task definition template - id: task-def-app - env: - ECR_REPOSITORY_URI: ${{ steps.login-ecr.outputs.registry }}/${{ env.ECR_REPOSITORY }} - IMAGE_TAG: ${{ github.sha }} + - name: Download current task definition + id: download-taskdef run: | - envsubst < aws/task-definition.json > aws/task-definition-processed.json - cat aws/task-definition-processed.json + aws ecs describe-task-definition \ + --task-definition dataspace \ + --query taskDefinition > aws/current-task-definition.json + cat aws/current-task-definition.json + + - name: Update container image only + id: task-def-app + uses: aws-actions/amazon-ecs-render-task-definition@v1 + with: + task-definition: aws/current-task-definition.json + container-name: dataspace + image: ${{ steps.build-image.outputs.image }} - name: Deploy main application ECS task definition uses: aws-actions/amazon-ecs-deploy-task-definition@v1 with: - task-definition: aws/task-definition-processed.json + task-definition: ${{ steps.task-def-app.outputs.task-definition }} service: ${{ secrets.ECS_SERVICE }} cluster: ${{ env.ECS_CLUSTER }} wait-for-service-stability: true @@ -123,16 +130,18 @@ jobs: aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws-region: ${{ env.AWS_REGION }} - - name: Process OpenTelemetry task definition template - id: task-def-otel + - name: Download current OpenTelemetry task definition + id: download-otel-taskdef run: | - envsubst < aws/otel-collector-task-definition.json > aws/otel-collector-task-definition-processed.json - cat aws/otel-collector-task-definition-processed.json + aws ecs describe-task-definition \ + --task-definition dataspace-otel-collector \ + --query taskDefinition > aws/current-otel-task-definition.json + cat aws/current-otel-task-definition.json - name: Deploy OpenTelemetry ECS task definition uses: aws-actions/amazon-ecs-deploy-task-definition@v1 with: - task-definition: aws/otel-collector-task-definition-processed.json + task-definition: aws/current-otel-task-definition.json service: ${{ secrets.ECS_OTEL_SERVICE }} cluster: ${{ env.ECS_CLUSTER }} wait-for-service-stability: true From ccaae5a4971ca80620c62ce555d484740b55cd9e Mon Sep 17 00:00:00 2001 From: dc Date: Thu, 26 Jun 2025 15:13:07 +0530 Subject: [PATCH 20/38] add efs to task definition and fix dockerfile to have a execution command --- .github/workflows/deploy-to-ecs.yml | 12 ++--- Dockerfile | 14 ++++-- .../dataspace-infrastructure.yml | 45 ++++++++++++++++++- aws/task-definition.json.template | 23 +++++++++- 4 files changed, 82 insertions(+), 12 deletions(-) diff --git a/.github/workflows/deploy-to-ecs.yml b/.github/workflows/deploy-to-ecs.yml index 64424b94..4fe14158 100644 --- a/.github/workflows/deploy-to-ecs.yml +++ b/.github/workflows/deploy-to-ecs.yml @@ -89,13 +89,13 @@ jobs: docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG echo "image=$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" >> $GITHUB_OUTPUT - - name: Download current task definition - id: download-taskdef + - name: Download task definition and get EFS ID run: | - aws ecs describe-task-definition \ - --task-definition dataspace \ - --query taskDefinition > aws/current-task-definition.json - cat aws/current-task-definition.json + aws ecs describe-task-definition --task-definition dataspace --query taskDefinition > aws/current-task-definition.json + aws ecs describe-task-definition --task-definition dataspace-otel-collector --query taskDefinition > aws/current-otel-task-definition.json + # Get the EFS ID from CloudFormation export + EFS_ID=$(aws cloudformation list-exports --query "Exports[?Name=='dataspace-${{ env.ENVIRONMENT }}-MigrationsFileSystemId'].Value" --output text) + echo "EFS_ID=$EFS_ID" >> $GITHUB_ENV - name: Update container image only id: task-def-app diff --git a/Dockerfile b/Dockerfile index a16a322a..30967ee2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,10 +12,18 @@ RUN echo 'deb http://archive.debian.org/debian stretch main contrib non-free' >> WORKDIR /code COPY . /code/ -RUN pip install psycopg2-binary +RUN pip install psycopg2-binary uvicorn RUN pip install -r requirements.txt -#RUN python manage.py migrate + +# Create healthcheck script +RUN echo '#!/bin/bash\nset -e\npython -c "import sys; import django; django.setup(); sys.exit(0)"' > /code/healthcheck.sh \ + && chmod +x /code/healthcheck.sh EXPOSE 8000 -#CMD ["python", "manage.py", "runserver", "0.0.0.0:8000"] \ No newline at end of file + +# Make entrypoint script executable +RUN chmod +x /code/docker-entrypoint.sh + +ENTRYPOINT ["/code/docker-entrypoint.sh"] +CMD ["uvicorn", "DataSpace.asgi:application", "--host", "0.0.0.0", "--port", "8000"] diff --git a/aws/cloudformation/dataspace-infrastructure.yml b/aws/cloudformation/dataspace-infrastructure.yml index 70e8b9b2..cf3a4e32 100644 --- a/aws/cloudformation/dataspace-infrastructure.yml +++ b/aws/cloudformation/dataspace-infrastructure.yml @@ -316,6 +316,39 @@ Resources: Value: !Sub 'https://dataspace-${Environment}.yourdomain.com' Description: URL whitelist + MigrationsFileSystem: + Type: AWS::EFS::FileSystem + Properties: + PerformanceMode: generalPurpose + Encrypted: true + FileSystemTags: + - Key: Name + Value: {"Fn::Sub": "${AWS::StackName}-migrations"} + + MigrationsAccessPoint: + Type: AWS::EFS::AccessPoint + Properties: + FileSystemId: {"Ref": "MigrationsFileSystem"} + PosixUser: + Uid: "1000" + Gid: "1000" + RootDirectory: + Path: "/migrations" + CreationInfo: + OwnerUid: "1000" + OwnerGid: "1000" + Permissions: "755" + + MigrationsFileSystemMountTarget: + Type: AWS::EFS::MountTarget + Properties: + FileSystemId: + Ref: MigrationsFileSystem + SubnetId: + "Fn::Select": [0, {"Ref": "SubnetIds"}] + SecurityGroups: + - {"Ref": "ECSSecurityGroup"} + Outputs: ClusterName: Description: ECS Cluster Name @@ -331,8 +364,16 @@ Outputs: RedisEndpoint: Description: Redis endpoint - Value: !GetAtt RedisCluster.RedisEndpoint.Address + Value: {"Fn::GetAtt": ["RedisCluster", "RedisEndpoint.Address"]} TaskExecutionRoleArn: Description: ECS Task Execution Role ARN - Value: !GetAtt ECSTaskExecutionRole.Arn + Value: {"Fn::GetAtt": ["ECSTaskExecutionRole", "Arn"]} + Export: + Name: {"Fn::Sub": "${AWS::StackName}-ECSTaskExecutionRoleArn"} + + MigrationsFileSystemId: + Description: EFS File System ID for migrations + Value: {"Ref": "MigrationsFileSystem"} + Export: + Name: {"Fn::Sub": "${AWS::StackName}-MigrationsFileSystemId"} diff --git a/aws/task-definition.json.template b/aws/task-definition.json.template index fdd73659..0c4bc093 100644 --- a/aws/task-definition.json.template +++ b/aws/task-definition.json.template @@ -1,10 +1,24 @@ { "family": "dataspace", "executionRoleArn": "${ECS_EXECUTION_ROLE_ARN}", + "taskRoleArn": "${ECS_EXECUTION_ROLE_ARN}", "networkMode": "awsvpc", "requiresCompatibilities": ["FARGATE"], "cpu": "${CPU_UNITS}", "memory": "${MEMORY_UNITS}", + "volumes": [ + { + "name": "migrations-volume", + "efsVolumeConfiguration": { + "fileSystemId": "${EFS_ID}", + "rootDirectory": "/migrations", + "transitEncryption": "ENABLED", + "authorizationConfig": { + "iam": "ENABLED" + } + } + } + ], "containerDefinitions": [ { "name": "dataspace", @@ -17,6 +31,13 @@ "protocol": "tcp" } ], + "mountPoints": [ + { + "sourceVolume": "migrations-volume", + "containerPath": "/code/api/migrations", + "readOnly": false + } + ], "environment": [ { "name": "DEBUG", "value": "${DEBUG_MODE}" }, { "name": "APP_PORT", "value": "${APP_PORT}" }, @@ -46,7 +67,7 @@ } }, "healthCheck": { - "command": ["CMD-SHELL", "curl -f http://localhost:${APP_PORT}/health/ || exit 1"], + "command": ["CMD-SHELL", "/code/healthcheck.sh && curl -f http://localhost:${APP_PORT}/health/ || exit 1"], "interval": 30, "timeout": 5, "retries": 3, From 74ee3d87d840b765875dd2feb5928099ebc2dab1 Mon Sep 17 00:00:00 2001 From: dc Date: Thu, 26 Jun 2025 15:26:56 +0530 Subject: [PATCH 21/38] add docker entrypoint --- docker-entrypoint.sh | 64 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 docker-entrypoint.sh diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh new file mode 100644 index 00000000..88c38884 --- /dev/null +++ b/docker-entrypoint.sh @@ -0,0 +1,64 @@ +#!/bin/bash +set -e + +# Wait for database to be ready +echo "Waiting for database..." +python << END +import sys +import time +import psycopg2 +import os + +# Get database connection details from environment variables +host = os.environ.get("DB_HOST", "localhost") +port = os.environ.get("DB_PORT", "5432") +dbname = os.environ.get("DB_NAME", "postgres") +user = os.environ.get("DB_USER", "postgres") +password = os.environ.get("DB_PASSWORD", "postgres") + +# Try to connect to the database +start_time = time.time() +timeout = 30 +while True: + try: + conn = psycopg2.connect( + host=host, + port=port, + dbname=dbname, + user=user, + password=password + ) + conn.close() + print("Database is ready!") + break + except psycopg2.OperationalError as e: + if time.time() - start_time > timeout: + print(f"Could not connect to database after {timeout} seconds: {e}") + sys.exit(1) + print("Waiting for database to be ready...") + time.sleep(2) +END + +# Run makemigrations first to ensure migration files are created +echo "Running makemigrations..." +python manage.py makemigrations --noinput + +# Run migrations +echo "Running migrations..." +python manage.py migrate --noinput + +# Create superuser if needed +if [ "$DJANGO_SUPERUSER_USERNAME" ] && [ "$DJANGO_SUPERUSER_PASSWORD" ] && [ "$DJANGO_SUPERUSER_EMAIL" ]; then + echo "Creating superuser..." + python manage.py createsuperuser --noinput +fi + +# Collect static files +if [ "$COLLECT_STATIC" = "true" ]; then + echo "Collecting static files..." + python manage.py collectstatic --noinput +fi + +# Start server +echo "Starting server..." +exec "$@" From 316f472375c033381ed763110b507174bdc0f344 Mon Sep 17 00:00:00 2001 From: dc Date: Thu, 26 Jun 2025 15:42:53 +0530 Subject: [PATCH 22/38] add validations for update dataset --- api/schema/dataset_schema.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/api/schema/dataset_schema.py b/api/schema/dataset_schema.py index 47b9c874..9410b5e5 100644 --- a/api/schema/dataset_schema.py +++ b/api/schema/dataset_schema.py @@ -636,7 +636,10 @@ def update_dataset( dataset = Dataset.objects.get(id=dataset_id) except Dataset.DoesNotExist as e: raise ValueError(f"Dataset with ID {dataset_id} does not exist.") - + if dataset.status != DatasetStatus.DRAFT.value: + raise ValueError(f"Dataset with ID {dataset_id} is not in draft status.") + if update_dataset_input.title == "": + raise ValueError("Title cannot be empty.") if update_dataset_input.title: dataset.title = update_dataset_input.title if update_dataset_input.description: From b3a05f778ec28cb9a195ff5d1e13fe57433ff638 Mon Sep 17 00:00:00 2001 From: dc Date: Thu, 26 Jun 2025 15:52:52 +0530 Subject: [PATCH 23/38] add draft validation and expand update_usecase mutation --- api/schema/usecase_schema.py | 59 ++++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/api/schema/usecase_schema.py b/api/schema/usecase_schema.py index 6abd010c..eba19833 100644 --- a/api/schema/usecase_schema.py +++ b/api/schema/usecase_schema.py @@ -64,6 +64,7 @@ class UseCaseInputPartial: """Input type for use case updates.""" id: str + title: auto slug: auto summary: auto @@ -241,7 +242,7 @@ class Mutation: """Mutations for use cases.""" create_use_case: TypeUseCase = mutations.create(UseCaseInput) - update_use_case: TypeUseCase = mutations.update(UseCaseInputPartial, key_attr="id") + # update_use_case: TypeUseCase = mutations.update(UseCaseInputPartial, key_attr="id") @strawberry_django.mutation( handle_django_errors=True, @@ -326,12 +327,41 @@ def add_update_usecase_metadata( except UseCase.DoesNotExist: raise ValueError(f"UseCase with ID {usecase_id} does not exist.") + if usecase.status != UseCaseStatus.DRAFT: + raise ValueError(f"UseCase with ID {usecase_id} is not in draft status.") + if update_metadata_input.tags is not None: _update_usecase_tags(usecase, update_metadata_input.tags) _add_update_usecase_metadata(usecase, metadata_input) _update_usecase_sectors(usecase, update_metadata_input.sectors) return TypeUseCase.from_django(usecase) + @strawberry_django.mutation(handle_django_errors=True) + @trace_resolver( + name="update_use_case", + attributes={"component": "usecase", "operation": "mutation"}, + ) + def update_use_case( + self, info: Info, use_case_input_partial: UseCaseInputPartial + ) -> TypeUseCase: + usecase_id = use_case_input_partial.id + try: + usecase = UseCase.objects.get(id=usecase_id) + except UseCase.DoesNotExist: + raise ValueError(f"UseCase with ID {usecase_id} does not exist.") + + if usecase.status != UseCaseStatus.DRAFT: + raise ValueError(f"UseCase with ID {usecase_id} is not in draft status.") + + if use_case_input_partial.title == "": + raise ValueError("Title cannot be empty.") + if use_case_input_partial.title is not None: + usecase.title = use_case_input_partial.title + if use_case_input_partial.summary is not None: + usecase.summary = use_case_input_partial.summary + usecase.save() + return TypeUseCase.from_django(usecase) + @strawberry_django.mutation( handle_django_errors=False, extensions=[ @@ -371,6 +401,9 @@ def add_dataset_to_use_case( except UseCase.DoesNotExist: raise ValueError(f"UseCase with ID {use_case_id} does not exist.") + if use_case.status != UseCaseStatus.DRAFT: + raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.") + use_case.datasets.add(dataset) use_case.save() return TypeUseCase.from_django(use_case) @@ -384,12 +417,13 @@ def remove_dataset_from_use_case( dataset = Dataset.objects.get(id=dataset_id) except Dataset.DoesNotExist: raise ValueError(f"Dataset with ID {dataset_id} does not exist.") - try: use_case = UseCase.objects.get(id=use_case_id) except UseCase.DoesNotExist: raise ValueError(f"UseCase with ID {use_case_id} does not exist.") + if use_case.status != UseCaseStatus.DRAFT: + raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.") use_case.datasets.remove(dataset) use_case.save() return TypeUseCase.from_django(use_case) @@ -409,6 +443,9 @@ def update_usecase_datasets( except UseCase.DoesNotExist: raise ValueError(f"Use Case with ID {use_case_id} doesn't exist") + if use_case.status != UseCaseStatus.DRAFT: + raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.") + use_case.datasets.set(datasets) use_case.save() return TypeUseCase.from_django(use_case) @@ -487,6 +524,9 @@ def add_contributor_to_use_case( except UseCase.DoesNotExist: raise ValueError(f"UseCase with ID {use_case_id} does not exist.") + if use_case.status != UseCaseStatus.DRAFT: + raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.") + use_case.contributors.add(user) use_case.save() return TypeUseCase.from_django(use_case) @@ -511,6 +551,9 @@ def remove_contributor_from_use_case( except UseCase.DoesNotExist: raise ValueError(f"UseCase with ID {use_case_id} does not exist.") + if use_case.status != UseCaseStatus.DRAFT: + raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.") + use_case.contributors.remove(user) use_case.save() return TypeUseCase.from_django(use_case) @@ -545,6 +588,9 @@ def update_usecase_contributors( except UseCase.DoesNotExist: raise ValueError(f"Use Case with ID {use_case_id} doesn't exist") + if use_case.status != UseCaseStatus.DRAFT: + raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.") + use_case.contributors.set(users) use_case.save() return TypeUseCase.from_django(use_case) @@ -569,6 +615,9 @@ def add_supporting_organization_to_use_case( except UseCase.DoesNotExist: raise ValueError(f"UseCase with ID {use_case_id} does not exist.") + if use_case.status != UseCaseStatus.DRAFT: + raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.") + # Create or get the relationship relationship, created = UseCaseOrganizationRelationship.objects.get_or_create( usecase=use_case, @@ -621,6 +670,9 @@ def add_partner_organization_to_use_case( except UseCase.DoesNotExist: raise ValueError(f"UseCase with ID {use_case_id} does not exist.") + if use_case.status != UseCaseStatus.DRAFT: + raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.") + # Create or get the relationship relationship, created = UseCaseOrganizationRelationship.objects.get_or_create( usecase=use_case, @@ -691,6 +743,9 @@ def update_usecase_organization_relationships( except UseCase.DoesNotExist: raise ValueError(f"UseCase with ID {use_case_id} does not exist.") + if use_case.status != UseCaseStatus.DRAFT: + raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.") + # Clear existing relationships UseCaseOrganizationRelationship.objects.filter(usecase=use_case).delete() From e3b55ed139a9100e1a8edcb57ea854d94b19a3d0 Mon Sep 17 00:00:00 2001 From: dc Date: Thu, 26 Jun 2025 15:56:15 +0530 Subject: [PATCH 24/38] use data for update_usecase mutation --- api/schema/usecase_schema.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/api/schema/usecase_schema.py b/api/schema/usecase_schema.py index eba19833..e4ce240f 100644 --- a/api/schema/usecase_schema.py +++ b/api/schema/usecase_schema.py @@ -341,10 +341,8 @@ def add_update_usecase_metadata( name="update_use_case", attributes={"component": "usecase", "operation": "mutation"}, ) - def update_use_case( - self, info: Info, use_case_input_partial: UseCaseInputPartial - ) -> TypeUseCase: - usecase_id = use_case_input_partial.id + def update_use_case(self, info: Info, data: UseCaseInputPartial) -> TypeUseCase: + usecase_id = data.id try: usecase = UseCase.objects.get(id=usecase_id) except UseCase.DoesNotExist: @@ -353,12 +351,12 @@ def update_use_case( if usecase.status != UseCaseStatus.DRAFT: raise ValueError(f"UseCase with ID {usecase_id} is not in draft status.") - if use_case_input_partial.title == "": + if data.title == "": raise ValueError("Title cannot be empty.") - if use_case_input_partial.title is not None: - usecase.title = use_case_input_partial.title - if use_case_input_partial.summary is not None: - usecase.summary = use_case_input_partial.summary + if data.title is not None: + usecase.title = data.title + if data.summary is not None: + usecase.summary = data.summary usecase.save() return TypeUseCase.from_django(usecase) From 7dc1c281c003d08eed5ea8850189785d2d1174b8 Mon Sep 17 00:00:00 2001 From: dc Date: Thu, 26 Jun 2025 15:58:56 +0530 Subject: [PATCH 25/38] dont handle django errors for update_use_case --- api/schema/usecase_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/schema/usecase_schema.py b/api/schema/usecase_schema.py index e4ce240f..a6f9caae 100644 --- a/api/schema/usecase_schema.py +++ b/api/schema/usecase_schema.py @@ -336,7 +336,7 @@ def add_update_usecase_metadata( _update_usecase_sectors(usecase, update_metadata_input.sectors) return TypeUseCase.from_django(usecase) - @strawberry_django.mutation(handle_django_errors=True) + @strawberry_django.mutation(handle_django_errors=False) @trace_resolver( name="update_use_case", attributes={"component": "usecase", "operation": "mutation"}, From e864fadcdea0f26752db17077d0d12cce2f21e54 Mon Sep 17 00:00:00 2001 From: dc Date: Thu, 26 Jun 2025 16:01:30 +0530 Subject: [PATCH 26/38] strip text inputs --- api/schema/dataset_schema.py | 6 +++--- api/schema/usecase_schema.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/api/schema/dataset_schema.py b/api/schema/dataset_schema.py index 9410b5e5..84c4d73a 100644 --- a/api/schema/dataset_schema.py +++ b/api/schema/dataset_schema.py @@ -638,12 +638,12 @@ def update_dataset( raise ValueError(f"Dataset with ID {dataset_id} does not exist.") if dataset.status != DatasetStatus.DRAFT.value: raise ValueError(f"Dataset with ID {dataset_id} is not in draft status.") - if update_dataset_input.title == "": + if update_dataset_input.title.strip() == "": raise ValueError("Title cannot be empty.") if update_dataset_input.title: - dataset.title = update_dataset_input.title + dataset.title = update_dataset_input.title.strip() if update_dataset_input.description: - dataset.description = update_dataset_input.description + dataset.description = update_dataset_input.description.strip() if update_dataset_input.access_type: dataset.access_type = update_dataset_input.access_type if update_dataset_input.license: diff --git a/api/schema/usecase_schema.py b/api/schema/usecase_schema.py index a6f9caae..291b0192 100644 --- a/api/schema/usecase_schema.py +++ b/api/schema/usecase_schema.py @@ -351,12 +351,12 @@ def update_use_case(self, info: Info, data: UseCaseInputPartial) -> TypeUseCase: if usecase.status != UseCaseStatus.DRAFT: raise ValueError(f"UseCase with ID {usecase_id} is not in draft status.") - if data.title == "": + if data.title.strip() == "": raise ValueError("Title cannot be empty.") if data.title is not None: - usecase.title = data.title + usecase.title = data.title.strip() if data.summary is not None: - usecase.summary = data.summary + usecase.summary = data.summary.strip() usecase.save() return TypeUseCase.from_django(usecase) From 139f1cbe370dbbd8b4cdcf75c5f6f7068364ed32 Mon Sep 17 00:00:00 2001 From: dc Date: Thu, 26 Jun 2025 16:08:30 +0530 Subject: [PATCH 27/38] add platform_url to usecase --- api/models/UseCase.py | 1 + api/schema/usecase_schema.py | 22 +++++++++++++++++++++- api/types/type_usecase.py | 3 +++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/api/models/UseCase.py b/api/models/UseCase.py index f085e70b..cd4febb5 100644 --- a/api/models/UseCase.py +++ b/api/models/UseCase.py @@ -54,6 +54,7 @@ class UseCase(models.Model): ) started_on = models.DateField(blank=True, null=True) completed_on = models.DateField(blank=True, null=True) + platform_url = models.URLField(blank=True, null=True) def save(self, *args: Any, **kwargs: Any) -> None: if self.title and not self.slug: diff --git a/api/schema/usecase_schema.py b/api/schema/usecase_schema.py index 291b0192..7ddacd6f 100644 --- a/api/schema/usecase_schema.py +++ b/api/schema/usecase_schema.py @@ -65,8 +65,14 @@ class UseCaseInputPartial: id: str title: auto - slug: auto summary: auto + platform_url: auto + tags: auto + sectors: auto + started_on: auto + completed_on: auto + logo: auto + running_status: auto @strawberry.type(name="Query") @@ -357,6 +363,20 @@ def update_use_case(self, info: Info, data: UseCaseInputPartial) -> TypeUseCase: usecase.title = data.title.strip() if data.summary is not None: usecase.summary = data.summary.strip() + if data.platform_url is not None: + usecase.platform_url = data.platform_url.strip() + if data.tags is not None: + _update_usecase_tags(usecase, data.tags) + if data.sectors is not None: + _update_usecase_sectors(usecase, data.sectors) + if data.started_on is not None: + usecase.started_on = data.started_on + if data.completed_on is not None: + usecase.completed_on = data.completed_on + if data.running_status is not None: + usecase.running_status = data.running_status + if data.logo is not None: + usecase.logo = data.logo usecase.save() return TypeUseCase.from_django(usecase) diff --git a/api/types/type_usecase.py b/api/types/type_usecase.py index 32d3c924..5242da21 100644 --- a/api/types/type_usecase.py +++ b/api/types/type_usecase.py @@ -58,6 +58,9 @@ class TypeUseCase(BaseType): organization: Optional[TypeOrganization] = strawberry.field( description="Organization associated with this use case" ) + platform_url: Optional[str] = strawberry.field( + description="URL of the platform where this use case is published" + ) @strawberry.field( description="Check if this use case is created by an individual user." From c7afb27ad18e03966deeb1c572bd4d1b5bddb240 Mon Sep 17 00:00:00 2001 From: dc Date: Thu, 26 Jun 2025 16:45:23 +0530 Subject: [PATCH 28/38] disable transitEncryption --- aws/cloudformation/dataspace-infrastructure.yml | 9 ++++----- aws/task-definition.json.template | 5 +---- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/aws/cloudformation/dataspace-infrastructure.yml b/aws/cloudformation/dataspace-infrastructure.yml index cf3a4e32..351445cf 100644 --- a/aws/cloudformation/dataspace-infrastructure.yml +++ b/aws/cloudformation/dataspace-infrastructure.yml @@ -339,15 +339,14 @@ Resources: OwnerGid: "1000" Permissions: "755" + # Create EFS mount targets in the first subnet MigrationsFileSystemMountTarget: Type: AWS::EFS::MountTarget Properties: - FileSystemId: - Ref: MigrationsFileSystem - SubnetId: - "Fn::Select": [0, {"Ref": "SubnetIds"}] + FileSystemId: !Ref MigrationsFileSystem + SubnetId: !Select [0, !Ref SubnetIds] SecurityGroups: - - {"Ref": "ECSSecurityGroup"} + - !Ref ECSSecurityGroup Outputs: ClusterName: diff --git a/aws/task-definition.json.template b/aws/task-definition.json.template index 0c4bc093..69fb51c3 100644 --- a/aws/task-definition.json.template +++ b/aws/task-definition.json.template @@ -12,10 +12,7 @@ "efsVolumeConfiguration": { "fileSystemId": "${EFS_ID}", "rootDirectory": "/migrations", - "transitEncryption": "ENABLED", - "authorizationConfig": { - "iam": "ENABLED" - } + "transitEncryption": "DISABLED" } } ], From 4596bfbc29403d4b20295ed57d03316dd17913d2 Mon Sep 17 00:00:00 2001 From: dc Date: Thu, 26 Jun 2025 17:34:41 +0530 Subject: [PATCH 29/38] make sure efs volume is mounted --- aws/cloudformation/dataspace-infrastructure.yml | 4 ++++ docker-entrypoint.sh | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/aws/cloudformation/dataspace-infrastructure.yml b/aws/cloudformation/dataspace-infrastructure.yml index 351445cf..08a42562 100644 --- a/aws/cloudformation/dataspace-infrastructure.yml +++ b/aws/cloudformation/dataspace-infrastructure.yml @@ -374,5 +374,9 @@ Outputs: MigrationsFileSystemId: Description: EFS File System ID for migrations Value: {"Ref": "MigrationsFileSystem"} + + MigrationsAccessPointId: + Description: EFS Access Point ID for migrations + Value: {"Ref": "MigrationsAccessPoint"} Export: Name: {"Fn::Sub": "${AWS::StackName}-MigrationsFileSystemId"} diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index 88c38884..05d482cb 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -39,6 +39,12 @@ while True: time.sleep(2) END +# Ensure migrations directory exists with proper permissions +echo "Ensuring migrations directory exists..." +mkdir -p /code/api/migrations +chmod -R 777 /code/api/migrations +touch /code/api/migrations/__init__.py + # Run makemigrations first to ensure migration files are created echo "Running makemigrations..." python manage.py makemigrations --noinput From b0f9aeacbfbb656b7f61007cc4f583c658b4ffb8 Mon Sep 17 00:00:00 2001 From: dc Date: Thu, 26 Jun 2025 17:52:19 +0530 Subject: [PATCH 30/38] use docker volume instead of efs --- aws/task-definition.json.template | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/aws/task-definition.json.template b/aws/task-definition.json.template index 69fb51c3..544bff8c 100644 --- a/aws/task-definition.json.template +++ b/aws/task-definition.json.template @@ -8,12 +8,7 @@ "memory": "${MEMORY_UNITS}", "volumes": [ { - "name": "migrations-volume", - "efsVolumeConfiguration": { - "fileSystemId": "${EFS_ID}", - "rootDirectory": "/migrations", - "transitEncryption": "DISABLED" - } + "name": "migrations-volume" } ], "containerDefinitions": [ From 31b01da61b06efcf335bdb854c799a381b4d53fb Mon Sep 17 00:00:00 2001 From: dc Date: Fri, 27 Jun 2025 13:52:12 +0530 Subject: [PATCH 31/38] remove tags and sector update from update_use_case --- api/schema/usecase_schema.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/api/schema/usecase_schema.py b/api/schema/usecase_schema.py index 7ddacd6f..0e65c848 100644 --- a/api/schema/usecase_schema.py +++ b/api/schema/usecase_schema.py @@ -365,10 +365,6 @@ def update_use_case(self, info: Info, data: UseCaseInputPartial) -> TypeUseCase: usecase.summary = data.summary.strip() if data.platform_url is not None: usecase.platform_url = data.platform_url.strip() - if data.tags is not None: - _update_usecase_tags(usecase, data.tags) - if data.sectors is not None: - _update_usecase_sectors(usecase, data.sectors) if data.started_on is not None: usecase.started_on = data.started_on if data.completed_on is not None: From dfa3e0d6301bc6b6c52757b8efbf19aa85899516 Mon Sep 17 00:00:00 2001 From: dc Date: Fri, 27 Jun 2025 16:33:10 +0530 Subject: [PATCH 32/38] use status value for update --- api/schema/usecase_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/schema/usecase_schema.py b/api/schema/usecase_schema.py index 0e65c848..0cbdfd81 100644 --- a/api/schema/usecase_schema.py +++ b/api/schema/usecase_schema.py @@ -370,7 +370,7 @@ def update_use_case(self, info: Info, data: UseCaseInputPartial) -> TypeUseCase: if data.completed_on is not None: usecase.completed_on = data.completed_on if data.running_status is not None: - usecase.running_status = data.running_status + usecase.running_status = data.running_status.value if data.logo is not None: usecase.logo = data.logo usecase.save() From 826dd14bad8127e190eb9319c1fbcfb26b836e4e Mon Sep 17 00:00:00 2001 From: dc Date: Mon, 30 Jun 2025 13:09:36 +0530 Subject: [PATCH 33/38] add checks for title on usecase update --- api/schema/usecase_schema.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/api/schema/usecase_schema.py b/api/schema/usecase_schema.py index 0cbdfd81..d56850a7 100644 --- a/api/schema/usecase_schema.py +++ b/api/schema/usecase_schema.py @@ -64,15 +64,15 @@ class UseCaseInputPartial: """Input type for use case updates.""" id: str - title: auto - summary: auto - platform_url: auto - tags: auto - sectors: auto - started_on: auto - completed_on: auto logo: auto running_status: auto + title: Optional[str] = None + summary: Optional[str] = None + platform_url: Optional[str] = None + tags: Optional[List[str]] = None + sectors: Optional[List[uuid.UUID]] = None + started_on: Optional[datetime.date] = None + completed_on: Optional[datetime.date] = None @strawberry.type(name="Query") @@ -357,9 +357,9 @@ def update_use_case(self, info: Info, data: UseCaseInputPartial) -> TypeUseCase: if usecase.status != UseCaseStatus.DRAFT: raise ValueError(f"UseCase with ID {usecase_id} is not in draft status.") - if data.title.strip() == "": - raise ValueError("Title cannot be empty.") if data.title is not None: + if data.title.strip() == "": + raise ValueError("Title cannot be empty.") usecase.title = data.title.strip() if data.summary is not None: usecase.summary = data.summary.strip() From 07c92a890911852b0e41b24da2f4be43c4dcba00 Mon Sep 17 00:00:00 2001 From: dc Date: Mon, 30 Jun 2025 13:09:36 +0530 Subject: [PATCH 34/38] add checks for title on usecase update --- api/schema/usecase_schema.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/api/schema/usecase_schema.py b/api/schema/usecase_schema.py index d56850a7..938e38f8 100644 --- a/api/schema/usecase_schema.py +++ b/api/schema/usecase_schema.py @@ -10,6 +10,7 @@ import strawberry_django from django.db import models from strawberry import auto +from strawberry.file_uploads import Upload from strawberry.types import Info from strawberry_django.mutations import mutations from strawberry_django.pagination import OffsetPaginationInput @@ -59,13 +60,16 @@ class UpdateUseCaseMetadataInput: sectors: List[uuid.UUID] +use_case_running_status = strawberry.enum(UseCaseStatus) # type: ignore + + @strawberry_django.partial(UseCase, fields="__all__", exclude=["datasets"]) class UseCaseInputPartial: """Input type for use case updates.""" id: str - logo: auto - running_status: auto + logo: Optional[Upload] = strawberry.field(default=None) + running_status: Optional[use_case_running_status] = UseCaseStatus.DRAFT title: Optional[str] = None summary: Optional[str] = None platform_url: Optional[str] = None @@ -367,11 +371,14 @@ def update_use_case(self, info: Info, data: UseCaseInputPartial) -> TypeUseCase: usecase.platform_url = data.platform_url.strip() if data.started_on is not None: usecase.started_on = data.started_on - if data.completed_on is not None: + if data.completed_on is not None and data.completed_on is not strawberry.UNSET: usecase.completed_on = data.completed_on - if data.running_status is not None: - usecase.running_status = data.running_status.value - if data.logo is not None: + if ( + data.running_status is not None + and data.running_status is not strawberry.UNSET + ): + usecase.running_status = data.running_status + if data.logo is not None and data.logo is not strawberry.UNSET: usecase.logo = data.logo usecase.save() return TypeUseCase.from_django(usecase) From b078c2244ffadb70f22893ee1fb867c2b9712751 Mon Sep 17 00:00:00 2001 From: dc Date: Mon, 30 Jun 2025 13:50:06 +0530 Subject: [PATCH 35/38] fix running status enum --- api/schema/usecase_schema.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/api/schema/usecase_schema.py b/api/schema/usecase_schema.py index 938e38f8..4e896609 100644 --- a/api/schema/usecase_schema.py +++ b/api/schema/usecase_schema.py @@ -33,7 +33,11 @@ TypeUseCaseOrganizationRelationship, relationship_type, ) -from api.utils.enums import OrganizationRelationshipType, UseCaseStatus +from api.utils.enums import ( + OrganizationRelationshipType, + UseCaseRunningStatus, + UseCaseStatus, +) from api.utils.graphql_telemetry import trace_resolver from authorization.models import User from authorization.types import TypeUser @@ -60,7 +64,7 @@ class UpdateUseCaseMetadataInput: sectors: List[uuid.UUID] -use_case_running_status = strawberry.enum(UseCaseStatus) # type: ignore +use_case_running_status = strawberry.enum(UseCaseRunningStatus) # type: ignore @strawberry_django.partial(UseCase, fields="__all__", exclude=["datasets"]) @@ -69,7 +73,7 @@ class UseCaseInputPartial: id: str logo: Optional[Upload] = strawberry.field(default=None) - running_status: Optional[use_case_running_status] = UseCaseStatus.DRAFT + running_status: Optional[use_case_running_status] = UseCaseRunningStatus.INITIATED title: Optional[str] = None summary: Optional[str] = None platform_url: Optional[str] = None From 9b79387ddfb7faf0f7a4d69a16928dd4a0479207 Mon Sep 17 00:00:00 2001 From: dc Date: Mon, 30 Jun 2025 14:16:06 +0530 Subject: [PATCH 36/38] add composite sources seoperate metadata fields --- api/views/paginated_elastic_view.py | 2 +- api/views/search_dataset.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/api/views/paginated_elastic_view.py b/api/views/paginated_elastic_view.py index a44a5564..93051758 100644 --- a/api/views/paginated_elastic_view.py +++ b/api/views/paginated_elastic_view.py @@ -91,7 +91,7 @@ def get(self, request: HttpRequest) -> Response: aggregations.pop("metadata") for agg in metadata_aggregations: label: str = agg["key"]["metadata_label"] - value: str = agg["key"]["metadata_value"] + value: str = agg["key"].get("metadata_value", "") if label not in aggregations: aggregations[label] = {} aggregations[label][value] = agg["doc_count"] diff --git a/api/views/search_dataset.py b/api/views/search_dataset.py index 9e8d269a..a1231c85 100644 --- a/api/views/search_dataset.py +++ b/api/views/search_dataset.py @@ -169,7 +169,12 @@ def add_aggregations(self, search: Search) -> Search: metadata_bucket = search.aggs.bucket("metadata", "nested", path="metadata") composite_sources = [ - {"metadata": {"terms": {"field": "metadata.label.keyword"}}} + { + "metadata_label": { + "terms": {"field": "metadata.metadata_item.label"} + } + }, + {"metadata_value": {"terms": {"field": "metadata.value"}}}, ] composite_agg = A( "composite", From d395d30f14ad52ec2802a0ef6110e9de684a61c3 Mon Sep 17 00:00:00 2001 From: dc Date: Mon, 30 Jun 2025 16:48:08 +0530 Subject: [PATCH 37/38] add actve sectors query --- api/schema/sector_schema.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/api/schema/sector_schema.py b/api/schema/sector_schema.py index d89ab1d8..f32aee6a 100644 --- a/api/schema/sector_schema.py +++ b/api/schema/sector_schema.py @@ -9,6 +9,7 @@ from api.models import Sector from api.types.type_sector import TypeSector +from api.utils.enums import DatasetStatus @strawberry.input @@ -40,6 +41,12 @@ def sector(self, info: Info, id: uuid.UUID) -> Optional[TypeSector]: except Sector.DoesNotExist: raise ValueError(f"Sector with ID {id} does not exist.") + @strawberry_django.field + def active_sectors(self, info: Info) -> list[TypeSector]: + """Get sectors with published datasets.""" + queryset = Sector.objects.filter(datasets__status=DatasetStatus.PUBLISHED) + return TypeSector.from_django_list(queryset) + @strawberry.type class Mutation: From 9c595a94cc0ff699168fdc193c13201fb16be13d Mon Sep 17 00:00:00 2001 From: dc Date: Mon, 30 Jun 2025 17:06:16 +0530 Subject: [PATCH 38/38] add order, paginationa and filter to active sectors query --- api/schema/sector_schema.py | 40 +++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/api/schema/sector_schema.py b/api/schema/sector_schema.py index f32aee6a..497063e4 100644 --- a/api/schema/sector_schema.py +++ b/api/schema/sector_schema.py @@ -1,14 +1,15 @@ import uuid -from typing import List, Optional +from typing import Any, List, Optional import strawberry import strawberry_django from strawberry import auto from strawberry.types import Info from strawberry_django.mutations import mutations +from strawberry_django.pagination import OffsetPaginationInput from api.models import Sector -from api.types.type_sector import TypeSector +from api.types.type_sector import SectorFilter, SectorOrder, TypeSector from api.utils.enums import DatasetStatus @@ -41,11 +42,38 @@ def sector(self, info: Info, id: uuid.UUID) -> Optional[TypeSector]: except Sector.DoesNotExist: raise ValueError(f"Sector with ID {id} does not exist.") - @strawberry_django.field - def active_sectors(self, info: Info) -> list[TypeSector]: + @strawberry_django.field( + filters=SectorFilter, + pagination=True, + order=SectorOrder, + ) + def active_sectors( + self, + info: Info, + filters: Optional[SectorFilter] = strawberry.UNSET, + pagination: Optional[OffsetPaginationInput] = strawberry.UNSET, + order: Optional[SectorOrder] = strawberry.UNSET, + ) -> list[TypeSector]: """Get sectors with published datasets.""" - queryset = Sector.objects.filter(datasets__status=DatasetStatus.PUBLISHED) - return TypeSector.from_django_list(queryset) + # Start with base queryset filtering for active sectors + queryset = Sector.objects.filter( + datasets__status=DatasetStatus.PUBLISHED + ).distinct() + + # Apply filters if provided + if filters is not strawberry.UNSET: + queryset = strawberry_django.filters.apply(filters, queryset, info) + + # Apply ordering if provided + if order is not strawberry.UNSET: + queryset = strawberry_django.ordering.apply(order, queryset, info) + + # Apply pagination if provided + if pagination is not strawberry.UNSET: + # Apply pagination to the list + queryset = strawberry_django.pagination.apply(pagination, queryset) + + return [TypeSector.from_django(instance) for instance in queryset] @strawberry.type