From 57945e29bb43dc8fbef75c366e6301a017f6c387 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Sat, 21 Jun 2025 17:15:31 +0530
Subject: [PATCH 01/38] check for request in kwargs as well

---
 api/activities/decorators.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/activities/decorators.py b/api/activities/decorators.py
index 2d644417..ab7cbf7e 100644
--- a/api/activities/decorators.py
+++ b/api/activities/decorators.py
@@ -39,7 +39,7 @@ def decorator(func: F) -> F:
         def wrapper(*args: Any, **kwargs: Any) -> Any:
             # Extract request from args (typically the first or second argument in view functions)
             request = None
-            for arg in args:
+            for arg in list(args) + list(kwargs.values()):
                 if isinstance(arg, HttpRequest):
                     request = arg
                     break

From a3f8610c17c69b59b40dc62f1af5b82b1c054a9c Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Mon, 23 Jun 2025 16:28:59 +0530
Subject: [PATCH 02/38] add all org query

---
 api/schema/organization_schema.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/api/schema/organization_schema.py b/api/schema/organization_schema.py
index 06d1c86f..9507c435 100644
--- a/api/schema/organization_schema.py
+++ b/api/schema/organization_schema.py
@@ -85,6 +85,15 @@ def organizations(
 
         return [TypeOrganization.from_django(org) for org in queryset]
 
+    @strawberry_django.field(permission_classes=[IsAuthenticated])
+    def all_organizations(self, info: Info) -> List[TypeOrganization]:
+        """Get all organizations."""
+        user = info.context.user
+        if not user or getattr(user, "is_anonymous", True):
+            logging.warning("Anonymous user or no user found in context")
+            return []
+        return [TypeOrganization.from_django(org) for org in Organization.objects.all()]
+
     @strawberry_django.field
     def organization(self, info: Info, id: str) -> Optional[TypeOrganization]:
         """Get organization by ID."""

From efd622fe03b9faa5fe3fe76b40593216268c90c6 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Mon, 23 Jun 2025 16:29:34 +0530
Subject: [PATCH 03/38] add example env file

---
 .env.example | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 .env.example

diff --git a/.env.example b/.env.example
new file mode 100644
index 00000000..78d362b9
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,14 @@
+DB_ENGINE=django.db.backends.postgresql
+DB_NAME=postgres
+DB_USER=postgres
+DB_PASSWORD=postgres
+DB_HOST=backend_db
+DB_PORT=5432
+TELEMETRY_URL=http://otel-collector:4317
+ELASTICSEARCH_INDEX=http://elasticsearch:9200
+ELASTICSEARCH_USERNAME=elastic
+ELASTICSEARCH_PASS=changeme
+URL_WHITELIST=http://localhost:8000,http://localhost,http://localhost:3000
+DEBUG=True
+SECRET_KEY=your-secret-key
+REDIS_URL=redis://redis:6379/1

From f996c1be70c0acb172adbb59b3d9ccfe671ae3c5 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Mon, 23 Jun 2025 16:30:00 +0530
Subject: [PATCH 04/38] use distng when fetching org and usecase relationships

---
 api/types/type_organization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/types/type_organization.py b/api/types/type_organization.py
index be51d46b..3c83caf8 100644
--- a/api/types/type_organization.py
+++ b/api/types/type_organization.py
@@ -59,7 +59,7 @@ def published_use_cases_count(self, info: Info) -> int:
             use_cases = UseCase.objects.filter(
                 usecaseorganizationrelationship__organization_id=org_id,  # type: ignore
                 status=UseCaseStatus.PUBLISHED.value,
-            )
+            ).distinct()
             return use_cases.count()
         except Exception:
             return 0

From 2b63e873907f94fcd0d4e4ae11eff47222874b1f Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Mon, 23 Jun 2025 16:53:03 +0530
Subject: [PATCH 05/38] update logging limits

---
 docker-compose.yml | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/docker-compose.yml b/docker-compose.yml
index 3fc51910..607d99b4 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -22,6 +22,10 @@ services:
       timeout: 10s
       retries: 3
       start_period: 40s
+    logging:
+      options:
+        max-size: "10m"
+        max-file: "3"
 
   backend_db:
     image: "postgres:14.4"
@@ -42,6 +46,10 @@ services:
       timeout: 5s
       retries: 5
       start_period: 10s
+    logging:
+      options:
+        max-size: "10m"
+        max-file: "3"
 
   elasticsearch:
     image: docker.elastic.co/elasticsearch/elasticsearch:8.12.2
@@ -74,6 +82,10 @@ services:
       timeout: 10s
       retries: 3
       start_period: 40s
+    logging:
+      options:
+        max-size: "10m"
+        max-file: "3"
 
   redis:
     image: "redis:alpine"
@@ -90,6 +102,10 @@ services:
       timeout: 5s
       retries: 5
       start_period: 10s
+    logging:
+      options:
+        max-size: "10m"
+        max-file: "3"
 
   telemetry_elasticsearch:
     image: docker.elastic.co/elasticsearch/elasticsearch:7.16.2
@@ -116,6 +132,10 @@ services:
       test:
         curl -s http://localhost:9200/_cluster/health | grep -vq
         '"status":"red"'
+    logging:
+      options:
+        max-size: "10m"
+        max-file: "3"
 
   kibana:
     image: docker.elastic.co/kibana/kibana:7.16.2
@@ -171,6 +191,10 @@ services:
       test:
         curl --write-out 'HTTP %{http_code}' --fail --silent --output /dev/null
         http://localhost:8200/
+    logging:
+      options:
+        max-size: "10m"
+        max-file: "3"
 
   otel-collector:
     image: otel/opentelemetry-collector:latest
@@ -184,6 +208,10 @@ services:
         condition: service_healthy
     ports:
       - 4317:4317
+    logging:
+      options:
+        max-size: "10m"
+        max-file: "3"
 
 volumes:
   backend_db_data:

From f2a8acb1b644daf04e0284f91287fe4f4611bf6b Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Mon, 23 Jun 2025 16:56:03 +0530
Subject: [PATCH 06/38] allow editing only draft datasets

---
 api/schema/dataset_schema.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/api/schema/dataset_schema.py b/api/schema/dataset_schema.py
index 372511ad..7a9f2ab8 100644
--- a/api/schema/dataset_schema.py
+++ b/api/schema/dataset_schema.py
@@ -564,6 +564,10 @@ def add_update_dataset_metadata(
             dataset = Dataset.objects.get(id=dataset_id)
         except Dataset.DoesNotExist as e:
             raise DjangoValidationError(f"Dataset with ID {dataset_id} does not exist.")
+        if dataset.status != DatasetStatus.DRAFT.value:
+            raise DjangoValidationError(
+                f"Dataset with ID {dataset_id} is not in draft status."
+            )
 
         if update_metadata_input.description:
             dataset.description = update_metadata_input.description

From 3e22e2c26479246588d909c8ce1757024f1f5dfd Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Mon, 23 Jun 2025 17:08:07 +0530
Subject: [PATCH 07/38] return those usecases where org is owner as well

---
 api/schema/organization_data_schema.py | 8 +++++++-
 api/types/type_organization.py         | 3 ++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/api/schema/organization_data_schema.py b/api/schema/organization_data_schema.py
index 7d5744fc..0620b23a 100644
--- a/api/schema/organization_data_schema.py
+++ b/api/schema/organization_data_schema.py
@@ -4,6 +4,7 @@
 
 import strawberry
 import strawberry_django
+from django.db.models import Q
 from strawberry.types import Info
 
 from api.models import Dataset, Organization, Sector, UseCase
@@ -58,7 +59,12 @@ def organization_published_use_cases(
         try:
             # Get published use cases for this organization
             queryset = UseCase.objects.filter(
-                usecaseorganizationrelationship__organization_id=organization_id,
+                (
+                    Q(organization__id=organization_id)
+                    | Q(
+                        usecaseorganizationrelationship__organization_id=organization_id
+                    )
+                ),
                 status=UseCaseStatus.PUBLISHED.value,
             ).distinct()
             return TypeUseCase.from_django_list(queryset)
diff --git a/api/types/type_organization.py b/api/types/type_organization.py
index 3c83caf8..d0c3e776 100644
--- a/api/types/type_organization.py
+++ b/api/types/type_organization.py
@@ -2,6 +2,7 @@
 
 import strawberry
 import strawberry_django
+from django.db.models import Q
 from strawberry import Info, auto
 
 from api.models import Organization
@@ -57,7 +58,7 @@ def published_use_cases_count(self, info: Info) -> int:
                 return 0
 
             use_cases = UseCase.objects.filter(
-                usecaseorganizationrelationship__organization_id=org_id,  # type: ignore
+                (Q(organization__id=org_id) | Q(usecaseorganizationrelationship__organization_id=org_id)),  # type: ignore
                 status=UseCaseStatus.PUBLISHED.value,
             ).distinct()
             return use_cases.count()

From 5ff4e6eed2e577ddc9ffb8683a546876a3b9b606 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Mon, 23 Jun 2025 18:28:19 +0530
Subject: [PATCH 08/38] update publishers query to handle individual publishers

---
 api/schema/dataset_schema.py | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/api/schema/dataset_schema.py b/api/schema/dataset_schema.py
index 7a9f2ab8..47b9c874 100644
--- a/api/schema/dataset_schema.py
+++ b/api/schema/dataset_schema.py
@@ -18,6 +18,7 @@
     ResourceChartDetails,
     ResourceChartImage,
     Sector,
+    UseCase,
 )
 from api.models.Dataset import Tag
 from api.models.DatasetMetadata import DatasetMetadata
@@ -31,7 +32,12 @@
 from api.types.type_organization import TypeOrganization
 from api.types.type_resource_chart import TypeResourceChart
 from api.types.type_resource_chart_image import TypeResourceChartImage
-from api.utils.enums import DatasetAccessType, DatasetLicense, DatasetStatus
+from api.utils.enums import (
+    DatasetAccessType,
+    DatasetLicense,
+    DatasetStatus,
+    UseCaseStatus,
+)
 from api.utils.graphql_telemetry import trace_resolver
 from authorization.models import DatasetPermission, OrganizationMembership, Role, User
 from authorization.permissions import (
@@ -469,20 +475,30 @@ def get_publishers(self, info: Info) -> List[Union[TypeOrganization, TypeUser]]:
         published_datasets = Dataset.objects.filter(
             status=DatasetStatus.PUBLISHED.value
         )
+        published_ds_organizations = published_datasets.values_list(
+            "organization_id", flat=True
+        )
+        published_usecases = UseCase.objects.filter(
+            status=UseCaseStatus.PUBLISHED.value
+        )
+        published_uc_organizations = published_usecases.values_list(
+            "organization_id", flat=True
+        )
+        published_organizations = set(published_ds_organizations) | set(
+            published_uc_organizations
+        )
 
         # Get unique organizations that have published datasets
         org_publishers = Organization.objects.filter(
-            id__in=published_datasets.filter(organization__isnull=False).values_list(
-                "organization_id", flat=True
-            )
+            id__in=published_organizations
         ).distinct()
 
+        published_ds_users = published_datasets.values_list("user_id", flat=True)
+        published_uc_users = published_usecases.values_list("user_id", flat=True)
+        published_users = set(published_ds_users) | set(published_uc_users)
+
         # Get unique individual users who have published datasets without an organization
-        individual_publishers = User.objects.filter(
-            id__in=published_datasets.filter(organization__isnull=True).values_list(
-                "user_id", flat=True
-            )
-        ).distinct()
+        individual_publishers = User.objects.filter(id__in=published_users).distinct()
 
         # Convert to GraphQL types
         org_types = [TypeOrganization.from_django(org) for org in org_publishers]

From 601a7cd01c3f3137238d86919c02edee363d2fd4 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Mon, 23 Jun 2025 18:42:02 +0530
Subject: [PATCH 09/38] add constraints for user role editing

---
 authorization/schema/mutation.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/authorization/schema/mutation.py b/authorization/schema/mutation.py
index 88103700..f6d7c2e9 100644
--- a/authorization/schema/mutation.py
+++ b/authorization/schema/mutation.py
@@ -111,15 +111,18 @@ def add_user_to_organization(
             organization = info.context.context.get("organization")
             role = Role.objects.get(id=input.role_id)
 
+            # If user trying to change self role, should raise error
+            if user.id == info.context.user.id:
+                raise ValueError("You cannot change your own role.")
+
             # Check if the membership already exists
             membership, created = OrganizationMembership.objects.get_or_create(
                 user=user, organization=organization, defaults={"role": role}
             )
 
-            # If the membership exists but the role is different, update it
-            if not created and membership.role != role:
-                membership.role = role
-                membership.save()
+            # If the membership exists, raise error
+            if not created:
+                raise ValueError("User is already a member of this organization.")
 
             return TypeOrganizationMembership.from_django(membership)
         except User.DoesNotExist:

From 7af8108bac800c58468df01a123c46ce92b5db37 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Mon, 23 Jun 2025 18:50:36 +0530
Subject: [PATCH 10/38] use base mutation for assiging user role

---
 authorization/schema/mutation.py | 40 ++++++++++++++++++++++----------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/authorization/schema/mutation.py b/authorization/schema/mutation.py
index f6d7c2e9..52b0f73c 100644
--- a/authorization/schema/mutation.py
+++ b/authorization/schema/mutation.py
@@ -7,7 +7,12 @@
 import structlog
 from strawberry.types import Info
 
-from api.models import Dataset, Organization
+from api.models import Dataset
+from api.schema.base_mutation import (
+    BaseMutation,
+    DjangoValidationError,
+    MutationResponse,
+)
 from api.utils.graphql_telemetry import trace_resolver
 from authorization.models import OrganizationMembership, Role, User
 from authorization.permissions import IsAuthenticated
@@ -95,16 +100,23 @@ def update_user(self, info: Info, input: UpdateUserInput) -> TypeUser:
 
         return TypeUser.from_django(user)
 
-    @strawberry_django.mutation(
+    @strawberry.mutation
+    @BaseMutation.mutation(
         permission_classes=[IsAuthenticated, HasOrganizationAdminRole],
-    )
-    @trace_resolver(
-        name="add_user_to_organization",
-        attributes={"component": "user", "operation": "mutation"},
+        trace_name="add_user_to_organization",
+        trace_attributes={"component": "user", "operation": "mutation"},
+        track_activity={
+            "verb": "added",
+            "get_data": lambda result, **kwargs: {
+                "user_id": str(result.user.id),
+                "organization_id": str(result.organization.id),
+                "role_id": str(result.role.id),
+            },
+        },
     )
     def add_user_to_organization(
         self, info: Info, input: AddRemoveUserToOrganizationInput
-    ) -> TypeOrganizationMembership:
+    ) -> MutationResponse[TypeOrganizationMembership]:
         """Add a user to an organization with a specific role."""
         try:
             user = User.objects.get(id=input.user_id)
@@ -113,7 +125,7 @@ def add_user_to_organization(
 
             # If user trying to change self role, should raise error
             if user.id == info.context.user.id:
-                raise ValueError("You cannot change your own role.")
+                raise DjangoValidationError("You cannot change your own role.")
 
             # Check if the membership already exists
             membership, created = OrganizationMembership.objects.get_or_create(
@@ -122,13 +134,17 @@ def add_user_to_organization(
 
             # If the membership exists, raise error
             if not created:
-                raise ValueError("User is already a member of this organization.")
+                raise DjangoValidationError(
+                    "User is already a member of this organization."
+                )
 
-            return TypeOrganizationMembership.from_django(membership)
+            return MutationResponse.success_response(
+                TypeOrganizationMembership.from_django(membership)
+            )
         except User.DoesNotExist:
-            raise ValueError(f"User with ID {input.user_id} does not exist.")
+            raise DjangoValidationError(f"User with ID {input.user_id} does not exist.")
         except Role.DoesNotExist:
-            raise ValueError(f"Role with ID {input.role_id} does not exist.")
+            raise DjangoValidationError(f"Role with ID {input.role_id} does not exist.")
 
     @strawberry.mutation
     def assign_organization_role(

From b341641789a6250b4e8e1325c5c278d65ac6ccac Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Mon, 23 Jun 2025 19:09:11 +0530
Subject: [PATCH 11/38] add permission class to delete tag

---
 api/schema/tags_schema.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/api/schema/tags_schema.py b/api/schema/tags_schema.py
index 5401d821..ef45a465 100644
--- a/api/schema/tags_schema.py
+++ b/api/schema/tags_schema.py
@@ -12,7 +12,9 @@
 class Mutation:
     """Mutations for tags."""
 
-    @strawberry_django.mutation(handle_django_errors=False)
+    @strawberry_django.mutation(
+        handle_django_errors=False, permission_classes=[IsAuthenticated]
+    )
     @trace_resolver(
         name="delete_tag", attributes={"component": "tag", "operation": "mutation"}
     )

From fec23c5fa3a50df36a9bcf3229350e7b55d0ea74 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Mon, 23 Jun 2025 19:13:49 +0530
Subject: [PATCH 12/38] use DjangoValidationError when deleting tags

---
 api/schema/tags_schema.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/api/schema/tags_schema.py b/api/schema/tags_schema.py
index ef45a465..5c549cbe 100644
--- a/api/schema/tags_schema.py
+++ b/api/schema/tags_schema.py
@@ -3,7 +3,11 @@
 from strawberry.types import Info
 
 from api.models import Tag
-from api.schema.base_mutation import BaseMutation, MutationResponse
+from api.schema.base_mutation import (
+    BaseMutation,
+    DjangoValidationError,
+    MutationResponse,
+)
 from api.utils.graphql_telemetry import trace_resolver
 from authorization.permissions import IsAuthenticated
 
@@ -38,6 +42,6 @@ def delete_tags(self, info: Info, tag_ids: list[str]) -> MutationResponse[bool]:
         try:
             tags = Tag.objects.filter(id__in=tag_ids)
         except Tag.DoesNotExist:
-            raise ValueError(f"Tags with IDs {tag_ids} do not exist.")
+            raise DjangoValidationError(f"Tags with IDs {tag_ids} do not exist.")
         tags.delete()
         return MutationResponse.success_response(True)

From 33333bc0c230afecfc68e0982d1ed53c1d67aaf5 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Wed, 25 Jun 2025 12:20:14 +0530
Subject: [PATCH 13/38] add basic cloudformation templates

---
 .github/workflows/deploy-to-ecs.yml           | 138 ++++++++
 .pre-commit-config.yaml                       |  12 +
 .../dataspace-infrastructure.yml              | 335 ++++++++++++++++++
 aws/multi-service-architecture.md             | 249 +++++++++++++
 aws/otel-collector-task-definition.json       |  81 +++++
 aws/redis-task-definition.json                |  53 +++
 aws/task-definition.json                      |  61 ++++
 7 files changed, 929 insertions(+)
 create mode 100644 .github/workflows/deploy-to-ecs.yml
 create mode 100644 aws/cloudformation/dataspace-infrastructure.yml
 create mode 100644 aws/multi-service-architecture.md
 create mode 100644 aws/otel-collector-task-definition.json
 create mode 100644 aws/redis-task-definition.json
 create mode 100644 aws/task-definition.json

diff --git a/.github/workflows/deploy-to-ecs.yml b/.github/workflows/deploy-to-ecs.yml
new file mode 100644
index 00000000..8becd02b
--- /dev/null
+++ b/.github/workflows/deploy-to-ecs.yml
@@ -0,0 +1,138 @@
+name: Deploy to Amazon ECS
+
+on:
+  push:
+    branches:
+      - dev
+  workflow_dispatch:
+
+env:
+  AWS_REGION: ${{ secrets.AWS_REGION }}
+  ECR_REPOSITORY: ${{ secrets.ECR_REPOSITORY }}
+  ECS_CLUSTER: ${{ secrets.ECS_CLUSTER }}
+  ECS_EXECUTION_ROLE_ARN: ${{ secrets.ECS_EXECUTION_ROLE_ARN }}
+  APP_NAME: dataspace
+  APP_PORT: 8000
+  DB_ENGINE: django.db.backends.postgresql
+  DB_PORT: 5432
+  DEBUG_MODE: "False"
+  TELEMETRY_URL: http://otel-collector:4317
+  CPU_UNITS: 256
+  MEMORY_UNITS: 512
+  SSM_PATH_PREFIX: /dataspace
+  ENVIRONMENT: ${{ secrets.ENVIRONMENT || 'dev' }}
+
+jobs:
+  deploy-infrastructure:
+    name: Deploy Infrastructure
+    runs-on: ubuntu-latest
+    environment: development
+    if: github.event_name == 'workflow_dispatch' || contains(github.event.head_commit.modified, 'aws/cloudformation')
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v1
+        with:
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ env.AWS_REGION }}
+
+      - name: Deploy CloudFormation stack
+        run: |
+          aws cloudformation deploy \
+            --template-file aws/cloudformation/dataspace-infrastructure.yml \
+            --stack-name dataspace-${{ env.ENVIRONMENT }}-infrastructure \
+            --parameter-overrides \
+              Environment=${{ env.ENVIRONMENT }} \
+              VpcId=${{ secrets.VPC_ID }} \
+              SubnetIds=${{ secrets.SUBNET_IDS }} \
+              DBUsername=${{ secrets.DB_USERNAME }} \
+              DBPassword=${{ secrets.DB_PASSWORD }} \
+              DBName=${{ secrets.DB_NAME }} \
+              ElasticsearchPassword=${{ secrets.ELASTICSEARCH_PASSWORD }} \
+              DjangoSecretKey=${{ secrets.DJANGO_SECRET_KEY }} \
+            --capabilities CAPABILITY_IAM \
+            --no-fail-on-empty-changeset
+
+  deploy-app:
+    name: Deploy Application
+    runs-on: ubuntu-latest
+    environment: development
+    needs: deploy-infrastructure
+    if: always() # Run even if infrastructure deployment is skipped
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v1
+        with:
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ env.AWS_REGION }}
+
+      - name: Login to Amazon ECR
+        id: login-ecr
+        uses: aws-actions/amazon-ecr-login@v1
+
+      - name: Build, tag, and push image to Amazon ECR
+        id: build-image
+        env:
+          ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
+          IMAGE_TAG: ${{ github.sha }}
+        run: |
+          docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG .
+          docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG
+          echo "image=$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" >> $GITHUB_OUTPUT
+
+      - name: Process main task definition template
+        id: task-def-app
+        env:
+          ECR_REPOSITORY_URI: ${{ steps.login-ecr.outputs.registry }}/${{ env.ECR_REPOSITORY }}
+          IMAGE_TAG: ${{ github.sha }}
+        run: |
+          envsubst < aws/task-definition.json > aws/task-definition-processed.json
+          cat aws/task-definition-processed.json
+
+      - name: Deploy main application ECS task definition
+        uses: aws-actions/amazon-ecs-deploy-task-definition@v1
+        with:
+          task-definition: aws/task-definition-processed.json
+          service: ${{ secrets.ECS_SERVICE }}
+          cluster: ${{ env.ECS_CLUSTER }}
+          wait-for-service-stability: true
+
+  deploy-otel:
+    name: Deploy OpenTelemetry Collector
+    runs-on: ubuntu-latest
+    environment: development
+    needs: deploy-app
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v1
+        with:
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ env.AWS_REGION }}
+
+      - name: Process OpenTelemetry task definition template
+        id: task-def-otel
+        run: |
+          envsubst < aws/otel-collector-task-definition.json > aws/otel-collector-task-definition-processed.json
+          cat aws/otel-collector-task-definition-processed.json
+
+      - name: Deploy OpenTelemetry ECS task definition
+        uses: aws-actions/amazon-ecs-deploy-task-definition@v1
+        with:
+          task-definition: aws/otel-collector-task-definition-processed.json
+          service: ${{ secrets.ECS_OTEL_SERVICE }}
+          cluster: ${{ env.ECS_CLUSTER }}
+          wait-for-service-stability: true
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 186db564..25a71ade 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -5,6 +5,7 @@ repos:
     -   id: trailing-whitespace
     -   id: end-of-file-fixer
     -   id: check-yaml
+        exclude: ^aws/cloudformation/.*\.yml$
     -   id: check-added-large-files
     -   id: debug-statements
 
@@ -20,6 +21,17 @@ repos:
     -   id: isort
         args: ["--profile", "black"]
 
+-   repo: local
+    hooks:
+    -   id: cloudformation-validate
+        name: AWS CloudFormation Validation
+        description: Validates CloudFormation templates using AWS CLI
+        entry: bash -c 'aws cloudformation validate-template --template-body file://$0 || exit 1'
+        language: system
+        files: ^aws/cloudformation/.*\.yml$
+        require_serial: true
+        pass_filenames: true
+
 -   repo: https://github.com/pre-commit/mirrors-mypy
     rev: v1.9.0
     hooks:
diff --git a/aws/cloudformation/dataspace-infrastructure.yml b/aws/cloudformation/dataspace-infrastructure.yml
new file mode 100644
index 00000000..2bece03d
--- /dev/null
+++ b/aws/cloudformation/dataspace-infrastructure.yml
@@ -0,0 +1,335 @@
+AWSTemplateFormatVersion: '2010-09-09'
+Description: 'DataSpace Application Infrastructure'
+
+Parameters:
+  Environment:
+    Description: Environment name (dev, staging, prod)
+    Type: String
+    Default: dev
+    AllowedValues:
+      - dev
+      - staging
+      - prod
+
+  VpcId:
+    Description: ID of the VPC
+    Type: AWS::EC2::VPC::Id
+
+  SubnetIds:
+    Description: List of subnet IDs for the application
+    Type: List<AWS::EC2::Subnet::Id>
+
+  DBUsername:
+    Description: Database username
+    Type: String
+    NoEcho: true
+
+  DBPassword:
+    Description: Database password
+    Type: String
+    NoEcho: true
+
+  DBName:
+    Description: Database name
+    Type: String
+    Default: dataspace
+
+  DBInstanceClass:
+    Description: Database instance class
+    Type: String
+    Default: db.t3.small
+
+  ElasticsearchInstanceType:
+    Description: Elasticsearch instance type
+    Type: String
+    Default: t3.small.elasticsearch
+
+  ElasticsearchPassword:
+    Description: Elasticsearch password
+    Type: String
+    NoEcho: true
+    Default: changeme
+
+  RedisNodeType:
+    Description: Redis node type
+    Type: String
+    Default: cache.t3.small
+
+  DjangoSecretKey:
+    Description: Django secret key
+    Type: String
+    NoEcho: true
+
+Resources:
+  # Security Groups
+  DatabaseSecurityGroup:
+    Type: AWS::EC2::SecurityGroup
+    Properties:
+      GroupDescription: Security group for RDS database
+      VpcId: !Ref VpcId
+      SecurityGroupIngress:
+        - IpProtocol: tcp
+          FromPort: 5432
+          ToPort: 5432
+          SourceSecurityGroupId: !Ref ECSSecurityGroup
+
+  ECSSecurityGroup:
+    Type: AWS::EC2::SecurityGroup
+    Properties:
+      GroupDescription: Security group for ECS tasks
+      VpcId: !Ref VpcId
+      SecurityGroupIngress:
+        - IpProtocol: tcp
+          FromPort: 8000
+          ToPort: 8000
+          CidrIp: 0.0.0.0/0
+
+  ElasticsearchSecurityGroup:
+    Type: AWS::EC2::SecurityGroup
+    Properties:
+      GroupDescription: Security group for Elasticsearch
+      VpcId: !Ref VpcId
+      SecurityGroupIngress:
+        - IpProtocol: tcp
+          FromPort: 443
+          ToPort: 443
+          SourceSecurityGroupId: !Ref ECSSecurityGroup
+
+  RedisSecurityGroup:
+    Type: AWS::EC2::SecurityGroup
+    Properties:
+      GroupDescription: Security group for Redis
+      VpcId: !Ref VpcId
+      SecurityGroupIngress:
+        - IpProtocol: tcp
+          FromPort: 6379
+          ToPort: 6379
+          SourceSecurityGroupId: !Ref ECSSecurityGroup
+
+  # Database
+  DatabaseSubnetGroup:
+    Type: AWS::RDS::DBSubnetGroup
+    Properties:
+      DBSubnetGroupDescription: Subnet group for DataSpace database
+      SubnetIds: !Ref SubnetIds
+
+  Database:
+    Type: AWS::RDS::DBInstance
+    Properties:
+      AllocatedStorage: 20
+      DBInstanceClass: !Ref DBInstanceClass
+      Engine: postgres
+      EngineVersion: '14.4'
+      MasterUsername: !Ref DBUsername
+      MasterUserPassword: !Ref DBPassword
+      DBName: !Ref DBName
+      VPCSecurityGroups:
+        - !GetAtt DatabaseSecurityGroup.GroupId
+      DBSubnetGroupName: !Ref DatabaseSubnetGroup
+      MultiAZ: false
+      StorageType: gp2
+      Tags:
+        - Key: Name
+          Value: !Sub dataspace-${Environment}-db
+    DeletionPolicy: Snapshot
+
+  # Elasticsearch Domain
+  ElasticsearchDomain:
+    Type: AWS::Elasticsearch::Domain
+    Properties:
+      DomainName: !Sub dataspace-${Environment}
+      ElasticsearchVersion: '7.10'
+      ElasticsearchClusterConfig:
+        InstanceType: !Ref ElasticsearchInstanceType
+        InstanceCount: 1
+      EBSOptions:
+        EBSEnabled: true
+        VolumeType: gp2
+        VolumeSize: 10
+      AccessPolicies:
+        Version: '2012-10-17'
+        Statement:
+          - Effect: Allow
+            Principal:
+              AWS: !Sub 'arn:aws:iam::${AWS::AccountId}:root'
+            Action: 'es:*'
+            Resource: !Sub 'arn:aws:es:${AWS::Region}:${AWS::AccountId}:domain/dataspace-${Environment}/*'
+      AdvancedSecurityOptions:
+        Enabled: true
+        InternalUserDatabaseEnabled: true
+        MasterUserOptions:
+          MasterUserName: elastic
+          MasterUserPassword: !Ref ElasticsearchPassword
+      EncryptionAtRestOptions:
+        Enabled: true
+      NodeToNodeEncryptionOptions:
+        Enabled: true
+      DomainEndpointOptions:
+        EnforceHTTPS: true
+      VPCOptions:
+        SecurityGroupIds:
+          - !GetAtt ElasticsearchSecurityGroup.GroupId
+        SubnetIds:
+          - !Select [0, !Ref SubnetIds]
+    # Note: AWS::Elasticsearch::Domain does not support DeletionPolicy: Snapshot
+
+  # Redis Cache
+  RedisSubnetGroup:
+    Type: AWS::ElastiCache::SubnetGroup
+    Properties:
+      Description: Subnet group for DataSpace Redis
+      SubnetIds: !Ref SubnetIds
+
+  RedisCluster:
+    Type: AWS::ElastiCache::CacheCluster
+    Properties:
+      CacheNodeType: !Ref RedisNodeType
+      Engine: redis
+      NumCacheNodes: 1
+      VpcSecurityGroupIds:
+        - !GetAtt RedisSecurityGroup.GroupId
+      CacheSubnetGroupName: !Ref RedisSubnetGroup
+      Tags:
+        - Key: Name
+          Value: !Sub dataspace-${Environment}-redis
+
+  # ECS Cluster
+  ECSCluster:
+    Type: AWS::ECS::Cluster
+    Properties:
+      ClusterName: !Sub dataspace-${Environment}-cluster
+      CapacityProviders:
+        - FARGATE
+        - FARGATE_SPOT
+      DefaultCapacityProviderStrategy:
+        - CapacityProvider: FARGATE
+          Weight: 1
+      Tags:
+        - Key: Name
+          Value: !Sub dataspace-${Environment}-cluster
+
+  # IAM Roles
+  ECSTaskExecutionRole:
+    Type: AWS::IAM::Role
+    Properties:
+      AssumeRolePolicyDocument:
+        Version: '2012-10-17'
+        Statement:
+          - Effect: Allow
+            Principal:
+              Service: ecs-tasks.amazonaws.com
+            Action: sts:AssumeRole
+      ManagedPolicyArns:
+        - arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy
+      Policies:
+        - PolicyName: SSMParameterAccess
+          PolicyDocument:
+            Version: '2012-10-17'
+            Statement:
+              - Effect: Allow
+                Action:
+                  - ssm:GetParameters
+                  - ssm:GetParameter
+                Resource: !Sub 'arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/dataspace/*'
+
+  # SSM Parameters
+  DBHostParameter:
+    Type: AWS::SSM::Parameter
+    Properties:
+      Name: /dataspace/DB_HOST
+      Type: String
+      Value: !GetAtt Database.Endpoint.Address
+      Description: Database host
+
+  DBNameParameter:
+    Type: AWS::SSM::Parameter
+    Properties:
+      Name: /dataspace/DB_NAME
+      Type: String
+      Value: !Ref DBName
+      Description: Database name
+
+  DBUserParameter:
+    Type: AWS::SSM::Parameter
+    Properties:
+      Name: /dataspace/DB_USER
+      Type: String
+      Value: !Ref DBUsername
+      Description: Database username
+
+  DBPasswordParameter:
+    Type: AWS::SSM::Parameter
+    Properties:
+      Name: /dataspace/DB_PASSWORD
+      Type: SecureString
+      Value: !Ref DBPassword
+      Description: Database password
+
+  ElasticsearchIndexParameter:
+    Type: AWS::SSM::Parameter
+    Properties:
+      Name: /dataspace/ELASTICSEARCH_INDEX
+      Type: String
+      Value: !GetAtt ElasticsearchDomain.DomainEndpoint
+      Description: Elasticsearch endpoint
+
+  ElasticsearchUsernameParameter:
+    Type: AWS::SSM::Parameter
+    Properties:
+      Name: /dataspace/ELASTICSEARCH_USERNAME
+      Type: String
+      Value: elastic
+      Description: Elasticsearch username
+
+  ElasticsearchPasswordParameter:
+    Type: AWS::SSM::Parameter
+    Properties:
+      Name: /dataspace/ELASTICSEARCH_PASS
+      Type: SecureString
+      Value: !Ref ElasticsearchPassword
+      Description: Elasticsearch password
+
+  RedisHostParameter:
+    Type: AWS::SSM::Parameter
+    Properties:
+      Name: /dataspace/REDIS_HOST
+      Type: String
+      Value: !GetAtt RedisCluster.RedisEndpoint.Address
+      Description: Redis host
+
+  SecretKeyParameter:
+    Type: AWS::SSM::Parameter
+    Properties:
+      Name: /dataspace/SECRET_KEY
+      Type: SecureString
+      Value: !Ref DjangoSecretKey
+      Description: Django secret key
+
+  URLWhitelistParameter:
+    Type: AWS::SSM::Parameter
+    Properties:
+      Name: /dataspace/URL_WHITELIST
+      Type: String
+      Value: !Sub 'https://dataspace-${Environment}.yourdomain.com'
+      Description: URL whitelist
+
+Outputs:
+  ClusterName:
+    Description: ECS Cluster Name
+    Value: !Ref ECSCluster
+
+  DatabaseEndpoint:
+    Description: Database endpoint
+    Value: !GetAtt Database.Endpoint.Address
+
+  ElasticsearchEndpoint:
+    Description: Elasticsearch endpoint
+    Value: !GetAtt ElasticsearchDomain.DomainEndpoint
+
+  RedisEndpoint:
+    Description: Redis endpoint
+    Value: !GetAtt RedisCluster.RedisEndpoint.Address
+
+  TaskExecutionRoleArn:
+    Description: ECS Task Execution Role ARN
+    Value: !GetAtt ECSTaskExecutionRole.Arn
diff --git a/aws/multi-service-architecture.md b/aws/multi-service-architecture.md
new file mode 100644
index 00000000..6bfc34cb
--- /dev/null
+++ b/aws/multi-service-architecture.md
@@ -0,0 +1,249 @@
+# DataSpace Multi-Service Architecture for AWS ECS
+
+This document outlines the architecture for deploying the DataSpace application and its dependent services to AWS ECS.
+
+## Architecture Overview
+
+The DataSpace application consists of several services that work together:
+
+1. **Backend Application (dataspace)** - The main Django/Python application
+2. **PostgreSQL Database (backend_db)** - Database for the application
+3. **Elasticsearch** - For search functionality
+4. **Redis** - For caching and possibly message queuing
+5. **Telemetry Services** - Including OpenTelemetry Collector
+
+## AWS Services Mapping
+
+For production deployment on AWS, we use the following mapping:
+
+| Local Service | AWS Service | Justification |
+|---------------|-------------|---------------|
+| dataspace (backend) | ECS Fargate | Containerized application, managed by ECS |
+| backend_db | Amazon RDS for PostgreSQL | Managed database service with backups, high availability |
+| elasticsearch | Amazon Elasticsearch Service | Managed Elasticsearch with scaling and security |
+| redis | Amazon ElastiCache for Redis | Managed Redis with high availability |
+| otel-collector | ECS Fargate (separate task) | Deployed as a separate container service |
+
+## Deployment Architecture
+
+### Infrastructure as Code
+
+All AWS resources are provisioned using CloudFormation templates located in `aws/cloudformation/`. The main template `dataspace-infrastructure.yml` creates:
+
+1. **Security Groups** - For RDS, Elasticsearch, Redis, and ECS services
+2. **Amazon RDS PostgreSQL** - Managed database with subnet group
+3. **Amazon Elasticsearch Service** - Managed Elasticsearch domain with security and access policies
+4. **Amazon ElastiCache Redis** - Managed Redis cluster
+5. **ECS Cluster** - With Fargate and Fargate Spot capacity providers
+6. **IAM Roles** - For ECS task execution with appropriate permissions
+7. **SSM Parameters** - For storing sensitive connection information
+
+### ECS Task Definitions
+
+The application is deployed using two main ECS task definitions:
+
+1. **Main Application (`aws/task-definition.json`)** - Deploys the Django application container with:
+   - Environment variables for configuration
+   - Secrets from SSM Parameter Store for sensitive data
+   - Health checks and logging configuration
+   - Network configuration for service discovery
+
+2. **OpenTelemetry Collector (`aws/otel-collector-task-definition.json`)** - Deploys the telemetry collector with:
+   - Port mappings for various telemetry protocols
+   - Volume mounts for configuration
+   - Health checks and logging
+
+### Managed Services Integration
+
+#### Amazon RDS PostgreSQL
+
+The PostgreSQL database is provisioned as a managed RDS instance with:
+
+- Automated backups
+- Security group restrictions (only accessible from ECS tasks)
+- Credentials stored in SSM Parameter Store
+- Connection information injected into the application container as environment variables
+
+#### Amazon Elasticsearch Service
+
+Elasticsearch is provisioned as a managed service with:
+
+- Fine-grained access control
+- HTTPS encryption
+- Security group restrictions
+- Connection information stored in SSM Parameter Store
+
+#### Amazon ElastiCache Redis
+
+Redis is provisioned as a managed ElastiCache cluster with:
+
+- Security group restrictions
+- Connection information stored in SSM Parameter Store
+- Host and port injected into the application container
+
+## CI/CD Pipeline
+
+The deployment is automated using GitHub Actions workflow (`.github/workflows/deploy-to-ecs.yml`) that:
+
+1. **Triggers** on pushes to the `dev` branch or manual workflow dispatch
+2. **Deploys Infrastructure** using CloudFormation (conditionally based on changes)
+3. **Builds and Pushes** Docker images to Amazon ECR
+4. **Deploys Application** using ECS task definitions with environment variable substitution
+5. **Deploys OpenTelemetry Collector** as a separate ECS service
+
+### Idempotent Infrastructure Creation
+
+The CloudFormation template is designed to be idempotent by:
+
+1. Using the `--no-fail-on-empty-changeset` flag in CloudFormation deployment
+2. Setting appropriate `DeletionPolicy` and `UpdateReplacePolicy` attributes on resources
+3. Using conditional resource creation based on environment parameters
+
+This ensures that:
+- If resources already exist, they won't be recreated unnecessarily
+- Database and Elasticsearch data is preserved during updates
+- Application code can be updated independently of infrastructure
+
+## Environment Variables and Secrets Management
+
+The deployment uses a three-tier approach to configuration:
+
+1. **GitHub Repository Secrets** - For AWS credentials and sensitive parameters
+2. **Environment Variables** - For non-sensitive configuration in CI/CD and ECS tasks
+3. **AWS SSM Parameter Store** - For service connection information and secrets
+
+### Required GitHub Secrets
+
+- `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` - AWS credentials
+- `AWS_REGION` - Target AWS region
+- `ECR_REPOSITORY` - ECR repository name
+- `ECS_CLUSTER` - ECS cluster name
+- `ECS_SERVICE` - Main application ECS service name
+- `ECS_OTEL_SERVICE` - OpenTelemetry collector ECS service name
+- `ECS_EXECUTION_ROLE_ARN` - ECS task execution role ARN
+- `VPC_ID` and `SUBNET_IDS` - VPC and subnet IDs
+- `DB_USERNAME`, `DB_PASSWORD`, `DB_NAME` - Database credentials
+- `ELASTICSEARCH_PASSWORD` - Elasticsearch password
+- `DJANGO_SECRET_KEY` - Django secret key
+- `ENVIRONMENT` - Deployment environment (dev, staging, prod)
+
+## Scaling and High Availability
+
+The architecture supports scaling and high availability through:
+
+1. **ECS Fargate** - Automatic scaling based on CPU/memory usage
+2. **RDS Multi-AZ** - Optional database high availability
+3. **ElastiCache Replication** - Optional Redis replication
+4. **Elasticsearch Multi-Node** - Optional Elasticsearch cluster scaling
+
+## Monitoring and Observability
+
+The deployment includes observability through:
+
+1. **CloudWatch Logs** - For all ECS services
+2. **OpenTelemetry Collector** - For metrics, traces, and logs collection
+3. **Health Checks** - For all services to ensure availability
+
+## Security Considerations
+
+The deployment implements security best practices:
+
+1. **IAM Least Privilege** - Task execution role with minimal permissions
+2. **Security Groups** - Restrict access between services
+3. **Secrets Management** - Sensitive data in SSM Parameter Store
+4. **Network Isolation** - Services in private subnets where appropriate
+5. **HTTPS** - For all external communication
+
+This approach uses AWS managed services where possible and ECS only for custom application containers:
+
+- **Backend Application**: ECS Fargate Task/Service
+- **Database**: Amazon RDS
+- **Elasticsearch**: Amazon Elasticsearch Service
+- **Redis**: Amazon ElastiCache
+- **Telemetry**: Amazon Elasticsearch Service + ECS for collectors/agents
+
+### Option 2: ECS for Everything
+
+This approach deploys everything as containers in ECS:
+
+- **Backend Application**: ECS Fargate Task/Service
+- **Database**: ECS Fargate Task with PostgreSQL container + EBS volume
+- **Elasticsearch**: ECS Fargate Task with Elasticsearch container + EBS volume
+- **Redis**: ECS Fargate Task with Redis container
+- **Telemetry**: ECS Fargate Tasks for all telemetry services
+
+### Recommended Approach
+
+We recommend **Option 1** for production workloads because:
+
+1. Managed services handle backups, high availability, and security patches
+2. Reduced operational overhead
+3. Better scalability and reliability
+4. Separation of concerns
+
+## Implementation Plan
+
+### 1. Create AWS Managed Services
+
+First, create the necessary managed services:
+
+- **RDS PostgreSQL Instance**
+- **ElastiCache Redis Cluster**
+- **Amazon Elasticsearch Service Domain(s)**
+
+### 2. Update Task Definition for Backend Application
+
+The task definition we've already created focuses on the backend application. It needs to be updated with connection information for the managed services.
+
+### 3. Create Task Definitions for Custom Services
+
+For services that don't have AWS managed equivalents (like otel-collector), create separate task definitions.
+
+### 4. Update CI/CD Pipeline
+
+Update the GitHub Actions workflow to:
+
+1. Deploy infrastructure changes if needed (using Terraform or CloudFormation)
+2. Deploy application containers to ECS
+
+## Example: RDS Configuration
+
+```bash
+# Create RDS instance
+aws rds create-db-instance \
+  --db-instance-identifier dataspace-db \
+  --db-instance-class db.t3.small \
+  --engine postgres \
+  --master-username ${DB_USERNAME} \
+  --master-user-password ${DB_PASSWORD} \
+  --allocated-storage 20
+```
+
+## Example: ElastiCache Configuration
+
+```bash
+# Create ElastiCache cluster
+aws elasticache create-cache-cluster \
+  --cache-cluster-id dataspace-redis \
+  --engine redis \
+  --cache-node-type cache.t3.small \
+  --num-cache-nodes 1
+```
+
+## Example: Amazon Elasticsearch Service
+
+```bash
+# Create Elasticsearch domain
+aws es create-elasticsearch-domain \
+  --domain-name dataspace-search \
+  --elasticsearch-version 7.10 \
+  --elasticsearch-cluster-config InstanceType=t3.small.elasticsearch,InstanceCount=1 \
+  --ebs-options EBSEnabled=true,VolumeType=gp2,VolumeSize=10
+```
+
+## Next Steps
+
+1. Create CloudFormation or Terraform templates for the infrastructure
+2. Update the ECS task definition with connection information for managed services
+3. Create separate task definitions for services that need to run in ECS
+4. Update the CI/CD pipeline to deploy all components
diff --git a/aws/otel-collector-task-definition.json b/aws/otel-collector-task-definition.json
new file mode 100644
index 00000000..cc5e1d37
--- /dev/null
+++ b/aws/otel-collector-task-definition.json
@@ -0,0 +1,81 @@
+{
+  "family": "${APP_NAME}-otel-collector",
+  "executionRoleArn": "${ECS_EXECUTION_ROLE_ARN}",
+  "networkMode": "awsvpc",
+  "containerDefinitions": [
+    {
+      "name": "${APP_NAME}-otel-collector",
+      "image": "otel/opentelemetry-collector:latest",
+      "essential": true,
+      "portMappings": [
+        {
+          "containerPort": 4317,
+          "hostPort": 4317,
+          "protocol": "tcp"
+        },
+        {
+          "containerPort": 4318,
+          "hostPort": 4318,
+          "protocol": "tcp"
+        },
+        {
+          "containerPort": 55680,
+          "hostPort": 55680,
+          "protocol": "tcp"
+        },
+        {
+          "containerPort": 55681,
+          "hostPort": 55681,
+          "protocol": "tcp"
+        }
+      ],
+      "environment": [
+        { "name": "ENVIRONMENT", "value": "${ENVIRONMENT}" }
+      ],
+      "mountPoints": [
+        {
+          "sourceVolume": "otel-config",
+          "containerPath": "/etc/otel-collector-config.yml",
+          "readOnly": true
+        }
+      ],
+      "logConfiguration": {
+        "logDriver": "awslogs",
+        "options": {
+          "awslogs-group": "/ecs/${APP_NAME}-otel-collector",
+          "awslogs-region": "${AWS_REGION}",
+          "awslogs-stream-prefix": "ecs"
+        }
+      },
+      "healthCheck": {
+        "command": ["CMD-SHELL", "curl -f http://localhost:13133/ || exit 1"],
+        "interval": 30,
+        "timeout": 5,
+        "retries": 3,
+        "startPeriod": 60
+      }
+    }
+  ],
+  "volumes": [
+    {
+      "name": "otel-config",
+      "dockerVolumeConfiguration": {
+        "scope": "task",
+        "driver": "local",
+        "labels": {
+          "app": "${APP_NAME}",
+          "component": "otel-collector"
+        }
+      }
+    }
+  ],
+  "requiresCompatibilities": ["FARGATE"],
+  "cpu": "256",
+  "memory": "512",
+  "tags": [
+    { "key": "Environment", "value": "${ENVIRONMENT}" },
+    { "key": "Application", "value": "${APP_NAME}" },
+    { "key": "Component", "value": "otel-collector" },
+    { "key": "ManagedBy", "value": "GitHub-Actions" }
+  ]
+}
diff --git a/aws/redis-task-definition.json b/aws/redis-task-definition.json
new file mode 100644
index 00000000..2a823bd3
--- /dev/null
+++ b/aws/redis-task-definition.json
@@ -0,0 +1,53 @@
+{
+  "family": "${APP_NAME}-redis",
+  "executionRoleArn": "${ECS_EXECUTION_ROLE_ARN}",
+  "networkMode": "awsvpc",
+  "containerDefinitions": [
+    {
+      "name": "${APP_NAME}-redis",
+      "image": "redis:alpine",
+      "essential": true,
+      "portMappings": [
+        {
+          "containerPort": 6379,
+          "hostPort": 6379,
+          "protocol": "tcp"
+        }
+      ],
+      "logConfiguration": {
+        "logDriver": "awslogs",
+        "options": {
+          "awslogs-group": "/ecs/${APP_NAME}-redis",
+          "awslogs-region": "${AWS_REGION}",
+          "awslogs-stream-prefix": "ecs"
+        }
+      },
+      "healthCheck": {
+        "command": ["CMD-SHELL", "redis-cli ping | grep -q 'PONG'"],
+        "interval": 10,
+        "timeout": 5,
+        "retries": 5,
+        "startPeriod": 10
+      },
+      "mountPoints": [
+        {
+          "sourceVolume": "redis-data",
+          "containerPath": "/data"
+        }
+      ]
+    }
+  ],
+  "volumes": [
+    {
+      "name": "redis-data",
+      "efsVolumeConfiguration": {
+        "fileSystemId": "${EFS_ID}",
+        "rootDirectory": "/redis-data",
+        "transitEncryption": "ENABLED"
+      }
+    }
+  ],
+  "requiresCompatibilities": ["FARGATE"],
+  "cpu": "512",
+  "memory": "1024"
+}
diff --git a/aws/task-definition.json b/aws/task-definition.json
new file mode 100644
index 00000000..e8b35399
--- /dev/null
+++ b/aws/task-definition.json
@@ -0,0 +1,61 @@
+{
+  "family": "${APP_NAME}",
+  "executionRoleArn": "${ECS_EXECUTION_ROLE_ARN}",
+  "networkMode": "awsvpc",
+  "containerDefinitions": [
+    {
+      "name": "${APP_NAME}",
+      "image": "${ECR_REPOSITORY_URI}:${IMAGE_TAG}",
+      "essential": true,
+      "portMappings": [
+        {
+          "containerPort": "${APP_PORT}",
+          "hostPort": "${APP_PORT}",
+          "protocol": "tcp"
+        }
+      ],
+      "environment": [
+        { "name": "DB_ENGINE", "value": "${DB_ENGINE}" },
+        { "name": "DB_PORT", "value": "${DB_PORT}" },
+        { "name": "DEBUG", "value": "${DEBUG_MODE}" },
+        { "name": "TELEMETRY_URL", "value": "${TELEMETRY_URL}" },
+        { "name": "REDIS_PORT", "value": "6379" }
+      ],
+      "secrets": [
+        { "name": "DB_HOST", "valueFrom": "${SSM_PATH_PREFIX}/DB_HOST" },
+        { "name": "DB_NAME", "valueFrom": "${SSM_PATH_PREFIX}/DB_NAME" },
+        { "name": "DB_USER", "valueFrom": "${SSM_PATH_PREFIX}/DB_USER" },
+        { "name": "DB_PASSWORD", "valueFrom": "${SSM_PATH_PREFIX}/DB_PASSWORD" },
+        { "name": "SECRET_KEY", "valueFrom": "${SSM_PATH_PREFIX}/SECRET_KEY" },
+        { "name": "ELASTICSEARCH_INDEX", "valueFrom": "${SSM_PATH_PREFIX}/ELASTICSEARCH_INDEX" },
+        { "name": "ELASTICSEARCH_USERNAME", "valueFrom": "${SSM_PATH_PREFIX}/ELASTICSEARCH_USERNAME" },
+        { "name": "ELASTICSEARCH_PASS", "valueFrom": "${SSM_PATH_PREFIX}/ELASTICSEARCH_PASS" },
+        { "name": "URL_WHITELIST", "valueFrom": "${SSM_PATH_PREFIX}/URL_WHITELIST" },
+        { "name": "REDIS_HOST", "valueFrom": "${SSM_PATH_PREFIX}/REDIS_HOST" }
+      ],
+      "logConfiguration": {
+        "logDriver": "awslogs",
+        "options": {
+          "awslogs-group": "/ecs/${APP_NAME}",
+          "awslogs-region": "${AWS_REGION}",
+          "awslogs-stream-prefix": "ecs"
+        }
+      },
+      "healthCheck": {
+        "command": ["CMD-SHELL", "curl -f http://localhost:${APP_PORT}/health/ || exit 1"],
+        "interval": 30,
+        "timeout": 5,
+        "retries": 3,
+        "startPeriod": 60
+      }
+    }
+  ],
+  "requiresCompatibilities": ["FARGATE"],
+  "cpu": "${CPU_UNITS}",
+  "memory": "${MEMORY_UNITS}",
+  "tags": [
+    { "key": "Environment", "value": "${ENVIRONMENT}" },
+    { "key": "Application", "value": "${APP_NAME}" },
+    { "key": "ManagedBy", "value": "GitHub-Actions" }
+  ]
+}

From bb1e96bcaa5e8763d6e2f8873dd5c07a5d390ae8 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Wed, 25 Jun 2025 13:49:03 +0530
Subject: [PATCH 14/38] always retain existing description

---
 api/utils/data_indexing.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/api/utils/data_indexing.py b/api/utils/data_indexing.py
index 3f2af763..679a2c86 100644
--- a/api/utils/data_indexing.py
+++ b/api/utils/data_indexing.py
@@ -163,12 +163,7 @@ def index_resource_data(resource: Resource) -> Optional[ResourceDataTable]:
                         if col in existing_schemas:
                             existing_description = existing_schemas[col]["description"]
                             # Check for None and non-auto-generated descriptions
-                            if (
-                                existing_description is not None
-                                and not existing_description.startswith(
-                                    "Description of column"
-                                )
-                            ):
+                            if existing_description is not None:
                                 description = existing_description
                                 logger.info(
                                     f"Preserved custom description for column {col}"

From 2b5fb9e60d43d2d6281a6eea3adc41d35cf194d7 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Wed, 25 Jun 2025 14:01:35 +0530
Subject: [PATCH 15/38] handle updating resource schema when updating file

---
 api/schema/resource_schema.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/api/schema/resource_schema.py b/api/schema/resource_schema.py
index 194842ef..b502ace2 100644
--- a/api/schema/resource_schema.py
+++ b/api/schema/resource_schema.py
@@ -159,6 +159,11 @@ def _create_file_resource_schema(resource: Resource) -> None:
         return
 
 
+def _reset_file_resource_schema(resource: Resource) -> None:
+    ResourceSchema.objects.filter(resource=resource).delete()
+    data_table = index_resource_data(resource)
+
+
 def _update_file_resource_schema(
     resource: Resource, updated_schema: List[SchemaUpdate]
 ) -> None:
@@ -262,6 +267,7 @@ def create_file_resources(
                 file=file, size=file.size, resource=resource
             )
             _validate_file_details_and_update_format(resource)
+            _create_file_resource_schema(resource)
             resources.append(TypeResource.from_django(resource))
         return resources
 
@@ -351,6 +357,8 @@ def update_file_resource(
                     size=file_resource_input.file.size,
                     resource=resource,
                 )
+            _validate_file_details_and_update_format(resource)
+            _create_file_resource_schema(resource)
 
         if file_resource_input.preview_details:
             _update_resource_preview_details(file_resource_input, resource)

From 37c6006cc945e3495ffadb3e675321d382a50803 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Wed, 25 Jun 2025 17:04:57 +0530
Subject: [PATCH 16/38] config changes to infra

---
 aws/cloudformation/dataspace-infrastructure.yml | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/aws/cloudformation/dataspace-infrastructure.yml b/aws/cloudformation/dataspace-infrastructure.yml
index 2bece03d..70e8b9b2 100644
--- a/aws/cloudformation/dataspace-infrastructure.yml
+++ b/aws/cloudformation/dataspace-infrastructure.yml
@@ -116,10 +116,10 @@ Resources:
   Database:
     Type: AWS::RDS::DBInstance
     Properties:
-      AllocatedStorage: 20
+      AllocatedStorage: 30
       DBInstanceClass: !Ref DBInstanceClass
       Engine: postgres
-      EngineVersion: '14.4'
+      EngineVersion: '17.4'
       MasterUsername: !Ref DBUsername
       MasterUserPassword: !Ref DBPassword
       DBName: !Ref DBName
@@ -261,9 +261,10 @@ Resources:
     Type: AWS::SSM::Parameter
     Properties:
       Name: /dataspace/DB_PASSWORD
-      Type: SecureString
+      Type: String
       Value: !Ref DBPassword
       Description: Database password
+      Tier: Standard
 
   ElasticsearchIndexParameter:
     Type: AWS::SSM::Parameter
@@ -285,9 +286,10 @@ Resources:
     Type: AWS::SSM::Parameter
     Properties:
       Name: /dataspace/ELASTICSEARCH_PASS
-      Type: SecureString
+      Type: String
       Value: !Ref ElasticsearchPassword
       Description: Elasticsearch password
+      Tier: Standard
 
   RedisHostParameter:
     Type: AWS::SSM::Parameter
@@ -301,9 +303,10 @@ Resources:
     Type: AWS::SSM::Parameter
     Properties:
       Name: /dataspace/SECRET_KEY
-      Type: SecureString
+      Type: String
       Value: !Ref DjangoSecretKey
       Description: Django secret key
+      Tier: Standard
 
   URLWhitelistParameter:
     Type: AWS::SSM::Parameter

From 674948cbf200bddcfec7fcad39e63ba2d4953138 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Wed, 25 Jun 2025 17:05:21 +0530
Subject: [PATCH 17/38] add templates for otel and main ecs services

---
 ...el-collector-task-definition.json.template | 67 +++++++++++++++++++
 aws/task-definition.json.template             | 62 +++++++++++++++++
 2 files changed, 129 insertions(+)
 create mode 100644 aws/otel-collector-task-definition.json.template
 create mode 100644 aws/task-definition.json.template

diff --git a/aws/otel-collector-task-definition.json.template b/aws/otel-collector-task-definition.json.template
new file mode 100644
index 00000000..966cdee4
--- /dev/null
+++ b/aws/otel-collector-task-definition.json.template
@@ -0,0 +1,67 @@
+{
+  "family": "dataspace-otel-collector",
+  "executionRoleArn": "${ECS_EXECUTION_ROLE_ARN}",
+  "networkMode": "awsvpc",
+  "requiresCompatibilities": ["FARGATE"],
+  "cpu": "${OTEL_CPU_UNITS}",
+  "memory": "${OTEL_MEMORY_UNITS}",
+  "containerDefinitions": [
+    {
+      "name": "otel-collector",
+      "image": "otel/opentelemetry-collector:${OTEL_VERSION}",
+      "essential": true,
+      "portMappings": [
+        {
+          "containerPort": 4317,
+          "hostPort": 4317,
+          "protocol": "tcp"
+        },
+        {
+          "containerPort": 4318,
+          "hostPort": 4318,
+          "protocol": "tcp"
+        },
+        {
+          "containerPort": 8888,
+          "hostPort": 8888,
+          "protocol": "tcp"
+        },
+        {
+          "containerPort": 8889,
+          "hostPort": 8889,
+          "protocol": "tcp"
+        }
+      ],
+      "environment": [
+        { "name": "OTEL_RESOURCE_ATTRIBUTES", "value": "service.name=dataspace-telemetry,deployment.environment=${ENVIRONMENT}" },
+        { "name": "OTEL_CONFIG", "value": "receivers:\n  otlp:\n    protocols:\n      grpc:\n      http:\n  prometheus:\n    config:\n      scrape_configs:\n        - job_name: 'otel-collector'\n          scrape_interval: 10s\n          static_configs:\n            - targets: ['0.0.0.0:8888']\nexporters:\n  logging:\n    verbosity: detailed\n  prometheus:\n    endpoint: 0.0.0.0:8889\nservice:\n  pipelines:\n    traces:\n      receivers: [otlp]\n      exporters: [logging]\n    metrics:\n      receivers: [otlp, prometheus]\n      exporters: [prometheus, logging]" },
+        { "name": "OTEL_CONFIG_PATH", "value": "/etc/otel/config.yaml" }
+      ],
+      "command": [
+        "--config=env:OTEL_CONFIG"
+      ],
+      "logConfiguration": {
+        "logDriver": "awslogs",
+        "options": {
+          "awslogs-group": "/ecs/dataspace-otel",
+          "awslogs-region": "${AWS_REGION}",
+          "awslogs-stream-prefix": "ecs",
+          "awslogs-create-group": "true"
+        }
+      },
+      "healthCheck": {
+        "command": ["CMD-SHELL", "curl -f http://localhost:8888/health || exit 1"],
+        "interval": 30,
+        "timeout": 5,
+        "retries": 3,
+        "startPeriod": 60
+      }
+    }
+  ],
+  "volumes": [],
+  "tags": [
+    { "key": "Environment", "value": "${ENVIRONMENT}" },
+    { "key": "Application", "value": "dataspace-telemetry" },
+    { "key": "ManagedBy", "value": "CloudFormation" }
+  ]
+}
diff --git a/aws/task-definition.json.template b/aws/task-definition.json.template
new file mode 100644
index 00000000..fdd73659
--- /dev/null
+++ b/aws/task-definition.json.template
@@ -0,0 +1,62 @@
+{
+  "family": "dataspace",
+  "executionRoleArn": "${ECS_EXECUTION_ROLE_ARN}",
+  "networkMode": "awsvpc",
+  "requiresCompatibilities": ["FARGATE"],
+  "cpu": "${CPU_UNITS}",
+  "memory": "${MEMORY_UNITS}",
+  "containerDefinitions": [
+    {
+      "name": "dataspace",
+      "image": "${ECR_REPOSITORY}:${IMAGE_TAG}",
+      "essential": true,
+      "portMappings": [
+        {
+          "containerPort": ${APP_PORT},
+          "hostPort": ${APP_PORT},
+          "protocol": "tcp"
+        }
+      ],
+      "environment": [
+        { "name": "DEBUG", "value": "${DEBUG_MODE}" },
+        { "name": "APP_PORT", "value": "${APP_PORT}" },
+        { "name": "DB_ENGINE", "value": "${DB_ENGINE}" },
+        { "name": "DB_PORT", "value": "${DB_PORT}" },
+        { "name": "TELEMETRY_URL", "value": "${TELEMETRY_URL}" }
+      ],
+      "secrets": [
+        { "name": "DB_HOST", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/DB_HOST" },
+        { "name": "DB_NAME", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/DB_NAME" },
+        { "name": "DB_USER", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/DB_USER" },
+        { "name": "DB_PASSWORD", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/DB_PASSWORD" },
+        { "name": "ELASTICSEARCH_INDEX", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/ELASTICSEARCH_INDEX" },
+        { "name": "ELASTICSEARCH_USERNAME", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/ELASTICSEARCH_USERNAME" },
+        { "name": "ELASTICSEARCH_PASS", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/ELASTICSEARCH_PASS" },
+        { "name": "REDIS_HOST", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/REDIS_HOST" },
+        { "name": "SECRET_KEY", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/SECRET_KEY" },
+        { "name": "URL_WHITELIST", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/URL_WHITELIST" }
+      ],
+      "logConfiguration": {
+        "logDriver": "awslogs",
+        "options": {
+          "awslogs-group": "/ecs/dataspace",
+          "awslogs-region": "${AWS_REGION}",
+          "awslogs-stream-prefix": "ecs",
+          "awslogs-create-group": "true"
+        }
+      },
+      "healthCheck": {
+        "command": ["CMD-SHELL", "curl -f http://localhost:${APP_PORT}/health/ || exit 1"],
+        "interval": 30,
+        "timeout": 5,
+        "retries": 3,
+        "startPeriod": 60
+      }
+    }
+  ],
+  "tags": [
+    { "key": "Environment", "value": "${ENVIRONMENT}" },
+    { "key": "Application", "value": "dataspace" },
+    { "key": "ManagedBy", "value": "CloudFormation" }
+  ]
+}

From 87188f2e331cc89e3ab1edb5213eba670c5d0e2e Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Wed, 25 Jun 2025 17:05:49 +0530
Subject: [PATCH 18/38] ignore aws env files

---
 .gitignore | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.gitignore b/.gitignore
index 434ee87c..707db789 100644
--- a/.gitignore
+++ b/.gitignore
@@ -155,3 +155,7 @@ resources/
 .env
 api/migrations/*
 authorization/migrations/*
+
+
+# AWS files
+aws/.env.*

From ac69774e33de13604f7909e2b292fa68afac2207 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Wed, 25 Jun 2025 18:40:25 +0530
Subject: [PATCH 19/38] use existing task definition to deploy

---
 .github/workflows/deploy-to-ecs.yml | 35 ++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/deploy-to-ecs.yml b/.github/workflows/deploy-to-ecs.yml
index 8becd02b..64424b94 100644
--- a/.github/workflows/deploy-to-ecs.yml
+++ b/.github/workflows/deploy-to-ecs.yml
@@ -89,19 +89,26 @@ jobs:
           docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG
           echo "image=$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" >> $GITHUB_OUTPUT
 
-      - name: Process main task definition template
-        id: task-def-app
-        env:
-          ECR_REPOSITORY_URI: ${{ steps.login-ecr.outputs.registry }}/${{ env.ECR_REPOSITORY }}
-          IMAGE_TAG: ${{ github.sha }}
+      - name: Download current task definition
+        id: download-taskdef
         run: |
-          envsubst < aws/task-definition.json > aws/task-definition-processed.json
-          cat aws/task-definition-processed.json
+          aws ecs describe-task-definition \
+            --task-definition dataspace \
+            --query taskDefinition > aws/current-task-definition.json
+          cat aws/current-task-definition.json
+
+      - name: Update container image only
+        id: task-def-app
+        uses: aws-actions/amazon-ecs-render-task-definition@v1
+        with:
+          task-definition: aws/current-task-definition.json
+          container-name: dataspace
+          image: ${{ steps.build-image.outputs.image }}
 
       - name: Deploy main application ECS task definition
         uses: aws-actions/amazon-ecs-deploy-task-definition@v1
         with:
-          task-definition: aws/task-definition-processed.json
+          task-definition: ${{ steps.task-def-app.outputs.task-definition }}
           service: ${{ secrets.ECS_SERVICE }}
           cluster: ${{ env.ECS_CLUSTER }}
           wait-for-service-stability: true
@@ -123,16 +130,18 @@ jobs:
           aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
           aws-region: ${{ env.AWS_REGION }}
 
-      - name: Process OpenTelemetry task definition template
-        id: task-def-otel
+      - name: Download current OpenTelemetry task definition
+        id: download-otel-taskdef
         run: |
-          envsubst < aws/otel-collector-task-definition.json > aws/otel-collector-task-definition-processed.json
-          cat aws/otel-collector-task-definition-processed.json
+          aws ecs describe-task-definition \
+            --task-definition dataspace-otel-collector \
+            --query taskDefinition > aws/current-otel-task-definition.json
+          cat aws/current-otel-task-definition.json
 
       - name: Deploy OpenTelemetry ECS task definition
         uses: aws-actions/amazon-ecs-deploy-task-definition@v1
         with:
-          task-definition: aws/otel-collector-task-definition-processed.json
+          task-definition: aws/current-otel-task-definition.json
           service: ${{ secrets.ECS_OTEL_SERVICE }}
           cluster: ${{ env.ECS_CLUSTER }}
           wait-for-service-stability: true

From ccaae5a4971ca80620c62ce555d484740b55cd9e Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Thu, 26 Jun 2025 15:13:07 +0530
Subject: [PATCH 20/38] add efs to task definition and fix dockerfile to have a
 execution command

---
 .github/workflows/deploy-to-ecs.yml           | 12 ++---
 Dockerfile                                    | 14 ++++--
 .../dataspace-infrastructure.yml              | 45 ++++++++++++++++++-
 aws/task-definition.json.template             | 23 +++++++++-
 4 files changed, 82 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/deploy-to-ecs.yml b/.github/workflows/deploy-to-ecs.yml
index 64424b94..4fe14158 100644
--- a/.github/workflows/deploy-to-ecs.yml
+++ b/.github/workflows/deploy-to-ecs.yml
@@ -89,13 +89,13 @@ jobs:
           docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG
           echo "image=$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" >> $GITHUB_OUTPUT
 
-      - name: Download current task definition
-        id: download-taskdef
+      - name: Download task definition and get EFS ID
         run: |
-          aws ecs describe-task-definition \
-            --task-definition dataspace \
-            --query taskDefinition > aws/current-task-definition.json
-          cat aws/current-task-definition.json
+          aws ecs describe-task-definition --task-definition dataspace --query taskDefinition > aws/current-task-definition.json
+          aws ecs describe-task-definition --task-definition dataspace-otel-collector --query taskDefinition > aws/current-otel-task-definition.json
+          # Get the EFS ID from CloudFormation export
+          EFS_ID=$(aws cloudformation list-exports --query "Exports[?Name=='dataspace-${{ env.ENVIRONMENT }}-MigrationsFileSystemId'].Value" --output text)
+          echo "EFS_ID=$EFS_ID" >> $GITHUB_ENV
 
       - name: Update container image only
         id: task-def-app
diff --git a/Dockerfile b/Dockerfile
index a16a322a..30967ee2 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -12,10 +12,18 @@ RUN echo 'deb http://archive.debian.org/debian stretch main contrib non-free' >>
 WORKDIR /code
 COPY . /code/
 
-RUN pip install psycopg2-binary
+RUN pip install psycopg2-binary uvicorn
 RUN pip install -r requirements.txt
-#RUN python manage.py migrate
+
+# Create healthcheck script
+RUN echo '#!/bin/bash\nset -e\npython -c "import sys; import django; django.setup(); sys.exit(0)"' > /code/healthcheck.sh \
+    && chmod +x /code/healthcheck.sh
 
 
 EXPOSE 8000
-#CMD ["python", "manage.py", "runserver", "0.0.0.0:8000"]
\ No newline at end of file
+
+# Make entrypoint script executable
+RUN chmod +x /code/docker-entrypoint.sh
+
+ENTRYPOINT ["/code/docker-entrypoint.sh"]
+CMD ["uvicorn", "DataSpace.asgi:application", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/aws/cloudformation/dataspace-infrastructure.yml b/aws/cloudformation/dataspace-infrastructure.yml
index 70e8b9b2..cf3a4e32 100644
--- a/aws/cloudformation/dataspace-infrastructure.yml
+++ b/aws/cloudformation/dataspace-infrastructure.yml
@@ -316,6 +316,39 @@ Resources:
       Value: !Sub 'https://dataspace-${Environment}.yourdomain.com'
       Description: URL whitelist
 
+  MigrationsFileSystem:
+    Type: AWS::EFS::FileSystem
+    Properties:
+      PerformanceMode: generalPurpose
+      Encrypted: true
+      FileSystemTags:
+        - Key: Name
+          Value: {"Fn::Sub": "${AWS::StackName}-migrations"}
+
+  MigrationsAccessPoint:
+    Type: AWS::EFS::AccessPoint
+    Properties:
+      FileSystemId: {"Ref": "MigrationsFileSystem"}
+      PosixUser:
+        Uid: "1000"
+        Gid: "1000"
+      RootDirectory:
+        Path: "/migrations"
+        CreationInfo:
+          OwnerUid: "1000"
+          OwnerGid: "1000"
+          Permissions: "755"
+
+  MigrationsFileSystemMountTarget:
+    Type: AWS::EFS::MountTarget
+    Properties:
+      FileSystemId:
+        Ref: MigrationsFileSystem
+      SubnetId:
+        "Fn::Select": [0, {"Ref": "SubnetIds"}]
+      SecurityGroups:
+        - {"Ref": "ECSSecurityGroup"}
+
 Outputs:
   ClusterName:
     Description: ECS Cluster Name
@@ -331,8 +364,16 @@ Outputs:
 
   RedisEndpoint:
     Description: Redis endpoint
-    Value: !GetAtt RedisCluster.RedisEndpoint.Address
+    Value: {"Fn::GetAtt": ["RedisCluster", "RedisEndpoint.Address"]}
 
   TaskExecutionRoleArn:
     Description: ECS Task Execution Role ARN
-    Value: !GetAtt ECSTaskExecutionRole.Arn
+    Value: {"Fn::GetAtt": ["ECSTaskExecutionRole", "Arn"]}
+    Export:
+      Name: {"Fn::Sub": "${AWS::StackName}-ECSTaskExecutionRoleArn"}
+
+  MigrationsFileSystemId:
+    Description: EFS File System ID for migrations
+    Value: {"Ref": "MigrationsFileSystem"}
+    Export:
+      Name: {"Fn::Sub": "${AWS::StackName}-MigrationsFileSystemId"}
diff --git a/aws/task-definition.json.template b/aws/task-definition.json.template
index fdd73659..0c4bc093 100644
--- a/aws/task-definition.json.template
+++ b/aws/task-definition.json.template
@@ -1,10 +1,24 @@
 {
   "family": "dataspace",
   "executionRoleArn": "${ECS_EXECUTION_ROLE_ARN}",
+  "taskRoleArn": "${ECS_EXECUTION_ROLE_ARN}",
   "networkMode": "awsvpc",
   "requiresCompatibilities": ["FARGATE"],
   "cpu": "${CPU_UNITS}",
   "memory": "${MEMORY_UNITS}",
+  "volumes": [
+    {
+      "name": "migrations-volume",
+      "efsVolumeConfiguration": {
+        "fileSystemId": "${EFS_ID}",
+        "rootDirectory": "/migrations",
+        "transitEncryption": "ENABLED",
+        "authorizationConfig": {
+          "iam": "ENABLED"
+        }
+      }
+    }
+  ],
   "containerDefinitions": [
     {
       "name": "dataspace",
@@ -17,6 +31,13 @@
           "protocol": "tcp"
         }
       ],
+      "mountPoints": [
+        {
+          "sourceVolume": "migrations-volume",
+          "containerPath": "/code/api/migrations",
+          "readOnly": false
+        }
+      ],
       "environment": [
         { "name": "DEBUG", "value": "${DEBUG_MODE}" },
         { "name": "APP_PORT", "value": "${APP_PORT}" },
@@ -46,7 +67,7 @@
         }
       },
       "healthCheck": {
-        "command": ["CMD-SHELL", "curl -f http://localhost:${APP_PORT}/health/ || exit 1"],
+        "command": ["CMD-SHELL", "/code/healthcheck.sh && curl -f http://localhost:${APP_PORT}/health/ || exit 1"],
         "interval": 30,
         "timeout": 5,
         "retries": 3,

From 74ee3d87d840b765875dd2feb5928099ebc2dab1 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Thu, 26 Jun 2025 15:26:56 +0530
Subject: [PATCH 21/38] add docker entrypoint

---
 docker-entrypoint.sh | 64 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 docker-entrypoint.sh

diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh
new file mode 100644
index 00000000..88c38884
--- /dev/null
+++ b/docker-entrypoint.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+set -e
+
+# Wait for database to be ready
+echo "Waiting for database..."
+python << END
+import sys
+import time
+import psycopg2
+import os
+
+# Get database connection details from environment variables
+host = os.environ.get("DB_HOST", "localhost")
+port = os.environ.get("DB_PORT", "5432")
+dbname = os.environ.get("DB_NAME", "postgres")
+user = os.environ.get("DB_USER", "postgres")
+password = os.environ.get("DB_PASSWORD", "postgres")
+
+# Try to connect to the database
+start_time = time.time()
+timeout = 30
+while True:
+    try:
+        conn = psycopg2.connect(
+            host=host,
+            port=port,
+            dbname=dbname,
+            user=user,
+            password=password
+        )
+        conn.close()
+        print("Database is ready!")
+        break
+    except psycopg2.OperationalError as e:
+        if time.time() - start_time > timeout:
+            print(f"Could not connect to database after {timeout} seconds: {e}")
+            sys.exit(1)
+        print("Waiting for database to be ready...")
+        time.sleep(2)
+END
+
+# Run makemigrations first to ensure migration files are created
+echo "Running makemigrations..."
+python manage.py makemigrations --noinput
+
+# Run migrations
+echo "Running migrations..."
+python manage.py migrate --noinput
+
+# Create superuser if needed
+if [ "$DJANGO_SUPERUSER_USERNAME" ] && [ "$DJANGO_SUPERUSER_PASSWORD" ] && [ "$DJANGO_SUPERUSER_EMAIL" ]; then
+    echo "Creating superuser..."
+    python manage.py createsuperuser --noinput
+fi
+
+# Collect static files
+if [ "$COLLECT_STATIC" = "true" ]; then
+    echo "Collecting static files..."
+    python manage.py collectstatic --noinput
+fi
+
+# Start server
+echo "Starting server..."
+exec "$@"

From 316f472375c033381ed763110b507174bdc0f344 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Thu, 26 Jun 2025 15:42:53 +0530
Subject: [PATCH 22/38] add validations for update dataset

---
 api/schema/dataset_schema.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/api/schema/dataset_schema.py b/api/schema/dataset_schema.py
index 47b9c874..9410b5e5 100644
--- a/api/schema/dataset_schema.py
+++ b/api/schema/dataset_schema.py
@@ -636,7 +636,10 @@ def update_dataset(
             dataset = Dataset.objects.get(id=dataset_id)
         except Dataset.DoesNotExist as e:
             raise ValueError(f"Dataset with ID {dataset_id} does not exist.")
-
+        if dataset.status != DatasetStatus.DRAFT.value:
+            raise ValueError(f"Dataset with ID {dataset_id} is not in draft status.")
+        if update_dataset_input.title == "":
+            raise ValueError("Title cannot be empty.")
         if update_dataset_input.title:
             dataset.title = update_dataset_input.title
         if update_dataset_input.description:

From b3a05f778ec28cb9a195ff5d1e13fe57433ff638 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Thu, 26 Jun 2025 15:52:52 +0530
Subject: [PATCH 23/38] add draft validation and expand update_usecase mutation

---
 api/schema/usecase_schema.py | 59 ++++++++++++++++++++++++++++++++++--
 1 file changed, 57 insertions(+), 2 deletions(-)

diff --git a/api/schema/usecase_schema.py b/api/schema/usecase_schema.py
index 6abd010c..eba19833 100644
--- a/api/schema/usecase_schema.py
+++ b/api/schema/usecase_schema.py
@@ -64,6 +64,7 @@ class UseCaseInputPartial:
     """Input type for use case updates."""
 
     id: str
+    title: auto
     slug: auto
     summary: auto
 
@@ -241,7 +242,7 @@ class Mutation:
     """Mutations for use cases."""
 
     create_use_case: TypeUseCase = mutations.create(UseCaseInput)
-    update_use_case: TypeUseCase = mutations.update(UseCaseInputPartial, key_attr="id")
+    # update_use_case: TypeUseCase = mutations.update(UseCaseInputPartial, key_attr="id")
 
     @strawberry_django.mutation(
         handle_django_errors=True,
@@ -326,12 +327,41 @@ def add_update_usecase_metadata(
         except UseCase.DoesNotExist:
             raise ValueError(f"UseCase with ID {usecase_id} does not exist.")
 
+        if usecase.status != UseCaseStatus.DRAFT:
+            raise ValueError(f"UseCase with ID {usecase_id} is not in draft status.")
+
         if update_metadata_input.tags is not None:
             _update_usecase_tags(usecase, update_metadata_input.tags)
         _add_update_usecase_metadata(usecase, metadata_input)
         _update_usecase_sectors(usecase, update_metadata_input.sectors)
         return TypeUseCase.from_django(usecase)
 
+    @strawberry_django.mutation(handle_django_errors=True)
+    @trace_resolver(
+        name="update_use_case",
+        attributes={"component": "usecase", "operation": "mutation"},
+    )
+    def update_use_case(
+        self, info: Info, use_case_input_partial: UseCaseInputPartial
+    ) -> TypeUseCase:
+        usecase_id = use_case_input_partial.id
+        try:
+            usecase = UseCase.objects.get(id=usecase_id)
+        except UseCase.DoesNotExist:
+            raise ValueError(f"UseCase with ID {usecase_id} does not exist.")
+
+        if usecase.status != UseCaseStatus.DRAFT:
+            raise ValueError(f"UseCase with ID {usecase_id} is not in draft status.")
+
+        if use_case_input_partial.title == "":
+            raise ValueError("Title cannot be empty.")
+        if use_case_input_partial.title is not None:
+            usecase.title = use_case_input_partial.title
+        if use_case_input_partial.summary is not None:
+            usecase.summary = use_case_input_partial.summary
+        usecase.save()
+        return TypeUseCase.from_django(usecase)
+
     @strawberry_django.mutation(
         handle_django_errors=False,
         extensions=[
@@ -371,6 +401,9 @@ def add_dataset_to_use_case(
         except UseCase.DoesNotExist:
             raise ValueError(f"UseCase with ID {use_case_id} does not exist.")
 
+        if use_case.status != UseCaseStatus.DRAFT:
+            raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.")
+
         use_case.datasets.add(dataset)
         use_case.save()
         return TypeUseCase.from_django(use_case)
@@ -384,12 +417,13 @@ def remove_dataset_from_use_case(
             dataset = Dataset.objects.get(id=dataset_id)
         except Dataset.DoesNotExist:
             raise ValueError(f"Dataset with ID {dataset_id} does not exist.")
-
         try:
             use_case = UseCase.objects.get(id=use_case_id)
         except UseCase.DoesNotExist:
             raise ValueError(f"UseCase with ID {use_case_id} does not exist.")
 
+        if use_case.status != UseCaseStatus.DRAFT:
+            raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.")
         use_case.datasets.remove(dataset)
         use_case.save()
         return TypeUseCase.from_django(use_case)
@@ -409,6 +443,9 @@ def update_usecase_datasets(
         except UseCase.DoesNotExist:
             raise ValueError(f"Use Case with ID {use_case_id} doesn't exist")
 
+        if use_case.status != UseCaseStatus.DRAFT:
+            raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.")
+
         use_case.datasets.set(datasets)
         use_case.save()
         return TypeUseCase.from_django(use_case)
@@ -487,6 +524,9 @@ def add_contributor_to_use_case(
         except UseCase.DoesNotExist:
             raise ValueError(f"UseCase with ID {use_case_id} does not exist.")
 
+        if use_case.status != UseCaseStatus.DRAFT:
+            raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.")
+
         use_case.contributors.add(user)
         use_case.save()
         return TypeUseCase.from_django(use_case)
@@ -511,6 +551,9 @@ def remove_contributor_from_use_case(
         except UseCase.DoesNotExist:
             raise ValueError(f"UseCase with ID {use_case_id} does not exist.")
 
+        if use_case.status != UseCaseStatus.DRAFT:
+            raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.")
+
         use_case.contributors.remove(user)
         use_case.save()
         return TypeUseCase.from_django(use_case)
@@ -545,6 +588,9 @@ def update_usecase_contributors(
         except UseCase.DoesNotExist:
             raise ValueError(f"Use Case with ID {use_case_id} doesn't exist")
 
+        if use_case.status != UseCaseStatus.DRAFT:
+            raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.")
+
         use_case.contributors.set(users)
         use_case.save()
         return TypeUseCase.from_django(use_case)
@@ -569,6 +615,9 @@ def add_supporting_organization_to_use_case(
         except UseCase.DoesNotExist:
             raise ValueError(f"UseCase with ID {use_case_id} does not exist.")
 
+        if use_case.status != UseCaseStatus.DRAFT:
+            raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.")
+
         # Create or get the relationship
         relationship, created = UseCaseOrganizationRelationship.objects.get_or_create(
             usecase=use_case,
@@ -621,6 +670,9 @@ def add_partner_organization_to_use_case(
         except UseCase.DoesNotExist:
             raise ValueError(f"UseCase with ID {use_case_id} does not exist.")
 
+        if use_case.status != UseCaseStatus.DRAFT:
+            raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.")
+
         # Create or get the relationship
         relationship, created = UseCaseOrganizationRelationship.objects.get_or_create(
             usecase=use_case,
@@ -691,6 +743,9 @@ def update_usecase_organization_relationships(
         except UseCase.DoesNotExist:
             raise ValueError(f"UseCase with ID {use_case_id} does not exist.")
 
+        if use_case.status != UseCaseStatus.DRAFT:
+            raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.")
+
         # Clear existing relationships
         UseCaseOrganizationRelationship.objects.filter(usecase=use_case).delete()
 

From e3b55ed139a9100e1a8edcb57ea854d94b19a3d0 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Thu, 26 Jun 2025 15:56:15 +0530
Subject: [PATCH 24/38] use data for update_usecase mutation

---
 api/schema/usecase_schema.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/api/schema/usecase_schema.py b/api/schema/usecase_schema.py
index eba19833..e4ce240f 100644
--- a/api/schema/usecase_schema.py
+++ b/api/schema/usecase_schema.py
@@ -341,10 +341,8 @@ def add_update_usecase_metadata(
         name="update_use_case",
         attributes={"component": "usecase", "operation": "mutation"},
     )
-    def update_use_case(
-        self, info: Info, use_case_input_partial: UseCaseInputPartial
-    ) -> TypeUseCase:
-        usecase_id = use_case_input_partial.id
+    def update_use_case(self, info: Info, data: UseCaseInputPartial) -> TypeUseCase:
+        usecase_id = data.id
         try:
             usecase = UseCase.objects.get(id=usecase_id)
         except UseCase.DoesNotExist:
@@ -353,12 +351,12 @@ def update_use_case(
         if usecase.status != UseCaseStatus.DRAFT:
             raise ValueError(f"UseCase with ID {usecase_id} is not in draft status.")
 
-        if use_case_input_partial.title == "":
+        if data.title == "":
             raise ValueError("Title cannot be empty.")
-        if use_case_input_partial.title is not None:
-            usecase.title = use_case_input_partial.title
-        if use_case_input_partial.summary is not None:
-            usecase.summary = use_case_input_partial.summary
+        if data.title is not None:
+            usecase.title = data.title
+        if data.summary is not None:
+            usecase.summary = data.summary
         usecase.save()
         return TypeUseCase.from_django(usecase)
 

From 7dc1c281c003d08eed5ea8850189785d2d1174b8 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Thu, 26 Jun 2025 15:58:56 +0530
Subject: [PATCH 25/38] dont handle django errors for update_use_case

---
 api/schema/usecase_schema.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/schema/usecase_schema.py b/api/schema/usecase_schema.py
index e4ce240f..a6f9caae 100644
--- a/api/schema/usecase_schema.py
+++ b/api/schema/usecase_schema.py
@@ -336,7 +336,7 @@ def add_update_usecase_metadata(
         _update_usecase_sectors(usecase, update_metadata_input.sectors)
         return TypeUseCase.from_django(usecase)
 
-    @strawberry_django.mutation(handle_django_errors=True)
+    @strawberry_django.mutation(handle_django_errors=False)
     @trace_resolver(
         name="update_use_case",
         attributes={"component": "usecase", "operation": "mutation"},

From e864fadcdea0f26752db17077d0d12cce2f21e54 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Thu, 26 Jun 2025 16:01:30 +0530
Subject: [PATCH 26/38] strip text inputs

---
 api/schema/dataset_schema.py | 6 +++---
 api/schema/usecase_schema.py | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/api/schema/dataset_schema.py b/api/schema/dataset_schema.py
index 9410b5e5..84c4d73a 100644
--- a/api/schema/dataset_schema.py
+++ b/api/schema/dataset_schema.py
@@ -638,12 +638,12 @@ def update_dataset(
             raise ValueError(f"Dataset with ID {dataset_id} does not exist.")
         if dataset.status != DatasetStatus.DRAFT.value:
             raise ValueError(f"Dataset with ID {dataset_id} is not in draft status.")
-        if update_dataset_input.title == "":
+        if update_dataset_input.title.strip() == "":
             raise ValueError("Title cannot be empty.")
         if update_dataset_input.title:
-            dataset.title = update_dataset_input.title
+            dataset.title = update_dataset_input.title.strip()
         if update_dataset_input.description:
-            dataset.description = update_dataset_input.description
+            dataset.description = update_dataset_input.description.strip()
         if update_dataset_input.access_type:
             dataset.access_type = update_dataset_input.access_type
         if update_dataset_input.license:
diff --git a/api/schema/usecase_schema.py b/api/schema/usecase_schema.py
index a6f9caae..291b0192 100644
--- a/api/schema/usecase_schema.py
+++ b/api/schema/usecase_schema.py
@@ -351,12 +351,12 @@ def update_use_case(self, info: Info, data: UseCaseInputPartial) -> TypeUseCase:
         if usecase.status != UseCaseStatus.DRAFT:
             raise ValueError(f"UseCase with ID {usecase_id} is not in draft status.")
 
-        if data.title == "":
+        if data.title.strip() == "":
             raise ValueError("Title cannot be empty.")
         if data.title is not None:
-            usecase.title = data.title
+            usecase.title = data.title.strip()
         if data.summary is not None:
-            usecase.summary = data.summary
+            usecase.summary = data.summary.strip()
         usecase.save()
         return TypeUseCase.from_django(usecase)
 

From 139f1cbe370dbbd8b4cdcf75c5f6f7068364ed32 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Thu, 26 Jun 2025 16:08:30 +0530
Subject: [PATCH 27/38] add platform_url to usecase

---
 api/models/UseCase.py        |  1 +
 api/schema/usecase_schema.py | 22 +++++++++++++++++++++-
 api/types/type_usecase.py    |  3 +++
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/api/models/UseCase.py b/api/models/UseCase.py
index f085e70b..cd4febb5 100644
--- a/api/models/UseCase.py
+++ b/api/models/UseCase.py
@@ -54,6 +54,7 @@ class UseCase(models.Model):
     )
     started_on = models.DateField(blank=True, null=True)
     completed_on = models.DateField(blank=True, null=True)
+    platform_url = models.URLField(blank=True, null=True)
 
     def save(self, *args: Any, **kwargs: Any) -> None:
         if self.title and not self.slug:
diff --git a/api/schema/usecase_schema.py b/api/schema/usecase_schema.py
index 291b0192..7ddacd6f 100644
--- a/api/schema/usecase_schema.py
+++ b/api/schema/usecase_schema.py
@@ -65,8 +65,14 @@ class UseCaseInputPartial:
 
     id: str
     title: auto
-    slug: auto
     summary: auto
+    platform_url: auto
+    tags: auto
+    sectors: auto
+    started_on: auto
+    completed_on: auto
+    logo: auto
+    running_status: auto
 
 
 @strawberry.type(name="Query")
@@ -357,6 +363,20 @@ def update_use_case(self, info: Info, data: UseCaseInputPartial) -> TypeUseCase:
             usecase.title = data.title.strip()
         if data.summary is not None:
             usecase.summary = data.summary.strip()
+        if data.platform_url is not None:
+            usecase.platform_url = data.platform_url.strip()
+        if data.tags is not None:
+            _update_usecase_tags(usecase, data.tags)
+        if data.sectors is not None:
+            _update_usecase_sectors(usecase, data.sectors)
+        if data.started_on is not None:
+            usecase.started_on = data.started_on
+        if data.completed_on is not None:
+            usecase.completed_on = data.completed_on
+        if data.running_status is not None:
+            usecase.running_status = data.running_status
+        if data.logo is not None:
+            usecase.logo = data.logo
         usecase.save()
         return TypeUseCase.from_django(usecase)
 
diff --git a/api/types/type_usecase.py b/api/types/type_usecase.py
index 32d3c924..5242da21 100644
--- a/api/types/type_usecase.py
+++ b/api/types/type_usecase.py
@@ -58,6 +58,9 @@ class TypeUseCase(BaseType):
     organization: Optional[TypeOrganization] = strawberry.field(
         description="Organization associated with this use case"
     )
+    platform_url: Optional[str] = strawberry.field(
+        description="URL of the platform where this use case is published"
+    )
 
     @strawberry.field(
         description="Check if this use case is created by an individual user."

From c7afb27ad18e03966deeb1c572bd4d1b5bddb240 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Thu, 26 Jun 2025 16:45:23 +0530
Subject: [PATCH 28/38] disable transitEncryption

---
 aws/cloudformation/dataspace-infrastructure.yml | 9 ++++-----
 aws/task-definition.json.template               | 5 +----
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/aws/cloudformation/dataspace-infrastructure.yml b/aws/cloudformation/dataspace-infrastructure.yml
index cf3a4e32..351445cf 100644
--- a/aws/cloudformation/dataspace-infrastructure.yml
+++ b/aws/cloudformation/dataspace-infrastructure.yml
@@ -339,15 +339,14 @@ Resources:
           OwnerGid: "1000"
           Permissions: "755"
 
+  # Create EFS mount targets in the first subnet
   MigrationsFileSystemMountTarget:
     Type: AWS::EFS::MountTarget
     Properties:
-      FileSystemId:
-        Ref: MigrationsFileSystem
-      SubnetId:
-        "Fn::Select": [0, {"Ref": "SubnetIds"}]
+      FileSystemId: !Ref MigrationsFileSystem
+      SubnetId: !Select [0, !Ref SubnetIds]
       SecurityGroups:
-        - {"Ref": "ECSSecurityGroup"}
+        - !Ref ECSSecurityGroup
 
 Outputs:
   ClusterName:
diff --git a/aws/task-definition.json.template b/aws/task-definition.json.template
index 0c4bc093..69fb51c3 100644
--- a/aws/task-definition.json.template
+++ b/aws/task-definition.json.template
@@ -12,10 +12,7 @@
       "efsVolumeConfiguration": {
         "fileSystemId": "${EFS_ID}",
         "rootDirectory": "/migrations",
-        "transitEncryption": "ENABLED",
-        "authorizationConfig": {
-          "iam": "ENABLED"
-        }
+        "transitEncryption": "DISABLED"
       }
     }
   ],

From 4596bfbc29403d4b20295ed57d03316dd17913d2 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Thu, 26 Jun 2025 17:34:41 +0530
Subject: [PATCH 29/38] make sure efs volume is mounted

---
 aws/cloudformation/dataspace-infrastructure.yml | 4 ++++
 docker-entrypoint.sh                            | 6 ++++++
 2 files changed, 10 insertions(+)

diff --git a/aws/cloudformation/dataspace-infrastructure.yml b/aws/cloudformation/dataspace-infrastructure.yml
index 351445cf..08a42562 100644
--- a/aws/cloudformation/dataspace-infrastructure.yml
+++ b/aws/cloudformation/dataspace-infrastructure.yml
@@ -374,5 +374,9 @@ Outputs:
   MigrationsFileSystemId:
     Description: EFS File System ID for migrations
     Value: {"Ref": "MigrationsFileSystem"}
+
+  MigrationsAccessPointId:
+    Description: EFS Access Point ID for migrations
+    Value: {"Ref": "MigrationsAccessPoint"}
     Export:
       Name: {"Fn::Sub": "${AWS::StackName}-MigrationsFileSystemId"}
diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh
index 88c38884..05d482cb 100644
--- a/docker-entrypoint.sh
+++ b/docker-entrypoint.sh
@@ -39,6 +39,12 @@ while True:
         time.sleep(2)
 END
 
+# Ensure migrations directory exists with proper permissions
+echo "Ensuring migrations directory exists..."
+mkdir -p /code/api/migrations
+chmod -R 777 /code/api/migrations
+touch /code/api/migrations/__init__.py
+
 # Run makemigrations first to ensure migration files are created
 echo "Running makemigrations..."
 python manage.py makemigrations --noinput

From b0f9aeacbfbb656b7f61007cc4f583c658b4ffb8 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Thu, 26 Jun 2025 17:52:19 +0530
Subject: [PATCH 30/38] use docker volume instead of efs

---
 aws/task-definition.json.template | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/aws/task-definition.json.template b/aws/task-definition.json.template
index 69fb51c3..544bff8c 100644
--- a/aws/task-definition.json.template
+++ b/aws/task-definition.json.template
@@ -8,12 +8,7 @@
   "memory": "${MEMORY_UNITS}",
   "volumes": [
     {
-      "name": "migrations-volume",
-      "efsVolumeConfiguration": {
-        "fileSystemId": "${EFS_ID}",
-        "rootDirectory": "/migrations",
-        "transitEncryption": "DISABLED"
-      }
+      "name": "migrations-volume"
     }
   ],
   "containerDefinitions": [

From 31b01da61b06efcf335bdb854c799a381b4d53fb Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Fri, 27 Jun 2025 13:52:12 +0530
Subject: [PATCH 31/38] remove tags and sector update from update_use_case

---
 api/schema/usecase_schema.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/api/schema/usecase_schema.py b/api/schema/usecase_schema.py
index 7ddacd6f..0e65c848 100644
--- a/api/schema/usecase_schema.py
+++ b/api/schema/usecase_schema.py
@@ -365,10 +365,6 @@ def update_use_case(self, info: Info, data: UseCaseInputPartial) -> TypeUseCase:
             usecase.summary = data.summary.strip()
         if data.platform_url is not None:
             usecase.platform_url = data.platform_url.strip()
-        if data.tags is not None:
-            _update_usecase_tags(usecase, data.tags)
-        if data.sectors is not None:
-            _update_usecase_sectors(usecase, data.sectors)
         if data.started_on is not None:
             usecase.started_on = data.started_on
         if data.completed_on is not None:

From dfa3e0d6301bc6b6c52757b8efbf19aa85899516 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Fri, 27 Jun 2025 16:33:10 +0530
Subject: [PATCH 32/38] use status value for update

---
 api/schema/usecase_schema.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/schema/usecase_schema.py b/api/schema/usecase_schema.py
index 0e65c848..0cbdfd81 100644
--- a/api/schema/usecase_schema.py
+++ b/api/schema/usecase_schema.py
@@ -370,7 +370,7 @@ def update_use_case(self, info: Info, data: UseCaseInputPartial) -> TypeUseCase:
         if data.completed_on is not None:
             usecase.completed_on = data.completed_on
         if data.running_status is not None:
-            usecase.running_status = data.running_status
+            usecase.running_status = data.running_status.value
         if data.logo is not None:
             usecase.logo = data.logo
         usecase.save()

From 826dd14bad8127e190eb9319c1fbcfb26b836e4e Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Mon, 30 Jun 2025 13:09:36 +0530
Subject: [PATCH 33/38] add checks for title on usecase update

---
 api/schema/usecase_schema.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/api/schema/usecase_schema.py b/api/schema/usecase_schema.py
index 0cbdfd81..d56850a7 100644
--- a/api/schema/usecase_schema.py
+++ b/api/schema/usecase_schema.py
@@ -64,15 +64,15 @@ class UseCaseInputPartial:
     """Input type for use case updates."""
 
     id: str
-    title: auto
-    summary: auto
-    platform_url: auto
-    tags: auto
-    sectors: auto
-    started_on: auto
-    completed_on: auto
     logo: auto
     running_status: auto
+    title: Optional[str] = None
+    summary: Optional[str] = None
+    platform_url: Optional[str] = None
+    tags: Optional[List[str]] = None
+    sectors: Optional[List[uuid.UUID]] = None
+    started_on: Optional[datetime.date] = None
+    completed_on: Optional[datetime.date] = None
 
 
 @strawberry.type(name="Query")
@@ -357,9 +357,9 @@ def update_use_case(self, info: Info, data: UseCaseInputPartial) -> TypeUseCase:
         if usecase.status != UseCaseStatus.DRAFT:
             raise ValueError(f"UseCase with ID {usecase_id} is not in draft status.")
 
-        if data.title.strip() == "":
-            raise ValueError("Title cannot be empty.")
         if data.title is not None:
+            if data.title.strip() == "":
+                raise ValueError("Title cannot be empty.")
             usecase.title = data.title.strip()
         if data.summary is not None:
             usecase.summary = data.summary.strip()

From 07c92a890911852b0e41b24da2f4be43c4dcba00 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Mon, 30 Jun 2025 13:09:36 +0530
Subject: [PATCH 34/38] add checks for title on usecase update

---
 api/schema/usecase_schema.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/api/schema/usecase_schema.py b/api/schema/usecase_schema.py
index d56850a7..938e38f8 100644
--- a/api/schema/usecase_schema.py
+++ b/api/schema/usecase_schema.py
@@ -10,6 +10,7 @@
 import strawberry_django
 from django.db import models
 from strawberry import auto
+from strawberry.file_uploads import Upload
 from strawberry.types import Info
 from strawberry_django.mutations import mutations
 from strawberry_django.pagination import OffsetPaginationInput
@@ -59,13 +60,16 @@ class UpdateUseCaseMetadataInput:
     sectors: List[uuid.UUID]
 
 
+use_case_running_status = strawberry.enum(UseCaseStatus)  # type: ignore
+
+
 @strawberry_django.partial(UseCase, fields="__all__", exclude=["datasets"])
 class UseCaseInputPartial:
     """Input type for use case updates."""
 
     id: str
-    logo: auto
-    running_status: auto
+    logo: Optional[Upload] = strawberry.field(default=None)
+    running_status: Optional[use_case_running_status] = UseCaseStatus.DRAFT
     title: Optional[str] = None
     summary: Optional[str] = None
     platform_url: Optional[str] = None
@@ -367,11 +371,14 @@ def update_use_case(self, info: Info, data: UseCaseInputPartial) -> TypeUseCase:
             usecase.platform_url = data.platform_url.strip()
         if data.started_on is not None:
             usecase.started_on = data.started_on
-        if data.completed_on is not None:
+        if data.completed_on is not None and data.completed_on is not strawberry.UNSET:
             usecase.completed_on = data.completed_on
-        if data.running_status is not None:
-            usecase.running_status = data.running_status.value
-        if data.logo is not None:
+        if (
+            data.running_status is not None
+            and data.running_status is not strawberry.UNSET
+        ):
+            usecase.running_status = data.running_status
+        if data.logo is not None and data.logo is not strawberry.UNSET:
             usecase.logo = data.logo
         usecase.save()
         return TypeUseCase.from_django(usecase)

From b078c2244ffadb70f22893ee1fb867c2b9712751 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Mon, 30 Jun 2025 13:50:06 +0530
Subject: [PATCH 35/38] fix running status enum

---
 api/schema/usecase_schema.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/api/schema/usecase_schema.py b/api/schema/usecase_schema.py
index 938e38f8..4e896609 100644
--- a/api/schema/usecase_schema.py
+++ b/api/schema/usecase_schema.py
@@ -33,7 +33,11 @@
     TypeUseCaseOrganizationRelationship,
     relationship_type,
 )
-from api.utils.enums import OrganizationRelationshipType, UseCaseStatus
+from api.utils.enums import (
+    OrganizationRelationshipType,
+    UseCaseRunningStatus,
+    UseCaseStatus,
+)
 from api.utils.graphql_telemetry import trace_resolver
 from authorization.models import User
 from authorization.types import TypeUser
@@ -60,7 +64,7 @@ class UpdateUseCaseMetadataInput:
     sectors: List[uuid.UUID]
 
 
-use_case_running_status = strawberry.enum(UseCaseStatus)  # type: ignore
+use_case_running_status = strawberry.enum(UseCaseRunningStatus)  # type: ignore
 
 
 @strawberry_django.partial(UseCase, fields="__all__", exclude=["datasets"])
@@ -69,7 +73,7 @@ class UseCaseInputPartial:
 
     id: str
     logo: Optional[Upload] = strawberry.field(default=None)
-    running_status: Optional[use_case_running_status] = UseCaseStatus.DRAFT
+    running_status: Optional[use_case_running_status] = UseCaseRunningStatus.INITIATED
     title: Optional[str] = None
     summary: Optional[str] = None
     platform_url: Optional[str] = None

From 9b79387ddfb7faf0f7a4d69a16928dd4a0479207 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Mon, 30 Jun 2025 14:16:06 +0530
Subject: [PATCH 36/38] add composite sources seoperate metadata fields

---
 api/views/paginated_elastic_view.py | 2 +-
 api/views/search_dataset.py         | 7 ++++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/api/views/paginated_elastic_view.py b/api/views/paginated_elastic_view.py
index a44a5564..93051758 100644
--- a/api/views/paginated_elastic_view.py
+++ b/api/views/paginated_elastic_view.py
@@ -91,7 +91,7 @@ def get(self, request: HttpRequest) -> Response:
             aggregations.pop("metadata")
             for agg in metadata_aggregations:
                 label: str = agg["key"]["metadata_label"]
-                value: str = agg["key"]["metadata_value"]
+                value: str = agg["key"].get("metadata_value", "")
                 if label not in aggregations:
                     aggregations[label] = {}
                 aggregations[label][value] = agg["doc_count"]
diff --git a/api/views/search_dataset.py b/api/views/search_dataset.py
index 9e8d269a..a1231c85 100644
--- a/api/views/search_dataset.py
+++ b/api/views/search_dataset.py
@@ -169,7 +169,12 @@ def add_aggregations(self, search: Search) -> Search:
 
             metadata_bucket = search.aggs.bucket("metadata", "nested", path="metadata")
             composite_sources = [
-                {"metadata": {"terms": {"field": "metadata.label.keyword"}}}
+                {
+                    "metadata_label": {
+                        "terms": {"field": "metadata.metadata_item.label"}
+                    }
+                },
+                {"metadata_value": {"terms": {"field": "metadata.value"}}},
             ]
             composite_agg = A(
                 "composite",

From d395d30f14ad52ec2802a0ef6110e9de684a61c3 Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Mon, 30 Jun 2025 16:48:08 +0530
Subject: [PATCH 37/38] add actve sectors query

---
 api/schema/sector_schema.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/api/schema/sector_schema.py b/api/schema/sector_schema.py
index d89ab1d8..f32aee6a 100644
--- a/api/schema/sector_schema.py
+++ b/api/schema/sector_schema.py
@@ -9,6 +9,7 @@
 
 from api.models import Sector
 from api.types.type_sector import TypeSector
+from api.utils.enums import DatasetStatus
 
 
 @strawberry.input
@@ -40,6 +41,12 @@ def sector(self, info: Info, id: uuid.UUID) -> Optional[TypeSector]:
         except Sector.DoesNotExist:
             raise ValueError(f"Sector with ID {id} does not exist.")
 
+    @strawberry_django.field
+    def active_sectors(self, info: Info) -> list[TypeSector]:
+        """Get sectors with published datasets."""
+        queryset = Sector.objects.filter(datasets__status=DatasetStatus.PUBLISHED)
+        return TypeSector.from_django_list(queryset)
+
 
 @strawberry.type
 class Mutation:

From 9c595a94cc0ff699168fdc193c13201fb16be13d Mon Sep 17 00:00:00 2001
From: dc <eopoxf@gmail.com>
Date: Mon, 30 Jun 2025 17:06:16 +0530
Subject: [PATCH 38/38] add order, paginationa and filter to active sectors
 query

---
 api/schema/sector_schema.py | 40 +++++++++++++++++++++++++++++++------
 1 file changed, 34 insertions(+), 6 deletions(-)

diff --git a/api/schema/sector_schema.py b/api/schema/sector_schema.py
index f32aee6a..497063e4 100644
--- a/api/schema/sector_schema.py
+++ b/api/schema/sector_schema.py
@@ -1,14 +1,15 @@
 import uuid
-from typing import List, Optional
+from typing import Any, List, Optional
 
 import strawberry
 import strawberry_django
 from strawberry import auto
 from strawberry.types import Info
 from strawberry_django.mutations import mutations
+from strawberry_django.pagination import OffsetPaginationInput
 
 from api.models import Sector
-from api.types.type_sector import TypeSector
+from api.types.type_sector import SectorFilter, SectorOrder, TypeSector
 from api.utils.enums import DatasetStatus
 
 
@@ -41,11 +42,38 @@ def sector(self, info: Info, id: uuid.UUID) -> Optional[TypeSector]:
         except Sector.DoesNotExist:
             raise ValueError(f"Sector with ID {id} does not exist.")
 
-    @strawberry_django.field
-    def active_sectors(self, info: Info) -> list[TypeSector]:
+    @strawberry_django.field(
+        filters=SectorFilter,
+        pagination=True,
+        order=SectorOrder,
+    )
+    def active_sectors(
+        self,
+        info: Info,
+        filters: Optional[SectorFilter] = strawberry.UNSET,
+        pagination: Optional[OffsetPaginationInput] = strawberry.UNSET,
+        order: Optional[SectorOrder] = strawberry.UNSET,
+    ) -> list[TypeSector]:
         """Get sectors with published datasets."""
-        queryset = Sector.objects.filter(datasets__status=DatasetStatus.PUBLISHED)
-        return TypeSector.from_django_list(queryset)
+        # Start with base queryset filtering for active sectors
+        queryset = Sector.objects.filter(
+            datasets__status=DatasetStatus.PUBLISHED
+        ).distinct()
+
+        # Apply filters if provided
+        if filters is not strawberry.UNSET:
+            queryset = strawberry_django.filters.apply(filters, queryset, info)
+
+        # Apply ordering if provided
+        if order is not strawberry.UNSET:
+            queryset = strawberry_django.ordering.apply(order, queryset, info)
+
+        # Apply pagination if provided
+        if pagination is not strawberry.UNSET:
+            # Apply pagination to the list
+            queryset = strawberry_django.pagination.apply(pagination, queryset)
+
+        return [TypeSector.from_django(instance) for instance in queryset]
 
 
 @strawberry.type