From 122469b2fc5d21edd5f38bdfe8666fbb69a70105 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Thu, 11 Dec 2025 10:00:11 +0100 Subject: [PATCH 1/8] fix default etl config id --- controller/transfer/cognition/minio_upload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/controller/transfer/cognition/minio_upload.py b/controller/transfer/cognition/minio_upload.py index 005b218e..85c82f1c 100644 --- a/controller/transfer/cognition/minio_upload.py +++ b/controller/transfer/cognition/minio_upload.py @@ -54,9 +54,12 @@ def handle_cognition_file_upload(path_parts: List[str]): ): project_id = file_reference.meta_data.get("project_id") conversation_id = file_reference.meta_data.get("conversation_id") + etl_config_id = file_reference.meta_data.get( + "etl_config_id" + ) or project_db_co.get_default_etl_config_id(project_id) full_config, tokenizer = etl_utils.get_full_config_and_tokenizer_from_config_id( file_reference, - etl_config_id=project_db_co.get_default_etl_config_id(project_id), + etl_config_id=etl_config_id, project_id=project_id, conversation_id=conversation_id, ) From 67ea1477d51f0a4733dbb4bfc9a13c39b1062474 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 11 Dec 2025 20:58:29 +0100 Subject: [PATCH 2/8] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 67d2713a..689d4548 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 67d2713ad19a2f4e4b676c5673f0ac3bfec0d1b3 +Subproject commit 689d45480dfb7cc8ccc6964b3d6469d129812677 From 02644d3bb39152a9e21c4021a6b6130825a6170d Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 11 Dec 2025 20:58:54 +0100 Subject: [PATCH 3/8] perf(alembic): stale etl --- .../cb0c970f42d1_adds_etl_stale_check.py | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 alembic/versions/cb0c970f42d1_adds_etl_stale_check.py diff --git a/alembic/versions/cb0c970f42d1_adds_etl_stale_check.py b/alembic/versions/cb0c970f42d1_adds_etl_stale_check.py new file mode 100644 index 00000000..737c3370 --- /dev/null +++ b/alembic/versions/cb0c970f42d1_adds_etl_stale_check.py @@ -0,0 +1,49 @@ +"""adds etl stale check + +Revision ID: cb0c970f42d1 +Revises: 04cd434ed6eb +Create Date: 2025-12-11 19:57:52.502994 + +""" + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "cb0c970f42d1" +down_revision = "04cd434ed6eb" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column( + "etl_task", + sa.Column("full_config_hash", sa.String(), nullable=True), + schema="global", + ) + op.add_column( + "etl_task", sa.Column("is_stale", sa.Boolean(), nullable=True), schema="global" + ) + op.create_index( + op.f("ix_global_etl_task_full_config_hash"), + "etl_task", + ["full_config_hash"], + unique=False, + schema="global", + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index( + op.f("ix_global_etl_task_full_config_hash"), + table_name="etl_task", + schema="global", + ) + op.drop_column("etl_task", "is_stale", schema="global") + op.drop_column("etl_task", "full_config_hash", schema="global") + # ### end Alembic commands ### From fc9cb98a7b85475a110b6a5f5c26139eb8ab46a5 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 11 Dec 2025 23:23:06 +0100 Subject: [PATCH 4/8] perf(alembic): add new monitoring attrs --- ...e_check.py => 28a36f227ad7_adds_etl_new_attributes.py} | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) rename alembic/versions/{cb0c970f42d1_adds_etl_stale_check.py => 28a36f227ad7_adds_etl_new_attributes.py} (90%) diff --git a/alembic/versions/cb0c970f42d1_adds_etl_stale_check.py b/alembic/versions/28a36f227ad7_adds_etl_new_attributes.py similarity index 90% rename from alembic/versions/cb0c970f42d1_adds_etl_stale_check.py rename to alembic/versions/28a36f227ad7_adds_etl_new_attributes.py index 737c3370..e428f1f0 100644 --- a/alembic/versions/cb0c970f42d1_adds_etl_stale_check.py +++ b/alembic/versions/28a36f227ad7_adds_etl_new_attributes.py @@ -1,8 +1,8 @@ -"""adds etl stale check +"""adds etl new attributes -Revision ID: cb0c970f42d1 +Revision ID: 28a36f227ad7 Revises: 04cd434ed6eb -Create Date: 2025-12-11 19:57:52.502994 +Create Date: 2025-12-11 22:22:28.935296 """ @@ -11,7 +11,7 @@ # revision identifiers, used by Alembic. -revision = "cb0c970f42d1" +revision = "28a36f227ad7" down_revision = "04cd434ed6eb" branch_labels = None depends_on = None From 7157e9168dce98ba26dc705152458ea38c42d7ec Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 11 Dec 2025 23:35:10 +0100 Subject: [PATCH 5/8] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index 689d4548..eda6ab57 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit 689d45480dfb7cc8ccc6964b3d6469d129812677 +Subproject commit eda6ab5782654d1cb44c580a8ef882766b377899 From 3cd5d7026369130a82c15e774261747f2a10f439 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 11 Dec 2025 23:35:23 +0100 Subject: [PATCH 6/8] perf(alembic): new etl attrs --- ...y => 15f133dd208b_adds_etl_new_attributes.py} | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) rename alembic/versions/{28a36f227ad7_adds_etl_new_attributes.py => 15f133dd208b_adds_etl_new_attributes.py} (71%) diff --git a/alembic/versions/28a36f227ad7_adds_etl_new_attributes.py b/alembic/versions/15f133dd208b_adds_etl_new_attributes.py similarity index 71% rename from alembic/versions/28a36f227ad7_adds_etl_new_attributes.py rename to alembic/versions/15f133dd208b_adds_etl_new_attributes.py index e428f1f0..e1a7d310 100644 --- a/alembic/versions/28a36f227ad7_adds_etl_new_attributes.py +++ b/alembic/versions/15f133dd208b_adds_etl_new_attributes.py @@ -1,8 +1,8 @@ """adds etl new attributes -Revision ID: 28a36f227ad7 +Revision ID: 15f133dd208b Revises: 04cd434ed6eb -Create Date: 2025-12-11 22:22:28.935296 +Create Date: 2025-12-11 22:34:07.966633 """ @@ -11,7 +11,7 @@ # revision identifiers, used by Alembic. -revision = "28a36f227ad7" +revision = "15f133dd208b" down_revision = "04cd434ed6eb" branch_labels = None depends_on = None @@ -27,6 +27,14 @@ def upgrade(): op.add_column( "etl_task", sa.Column("is_stale", sa.Boolean(), nullable=True), schema="global" ) + op.add_column( + "etl_task", sa.Column("llm_ops", sa.JSON(), nullable=True), schema="global" + ) + op.add_column( + "etl_task", + sa.Column("updated_at", sa.DateTime(), nullable=True), + schema="global", + ) op.create_index( op.f("ix_global_etl_task_full_config_hash"), "etl_task", @@ -44,6 +52,8 @@ def downgrade(): table_name="etl_task", schema="global", ) + op.drop_column("etl_task", "updated_at", schema="global") + op.drop_column("etl_task", "llm_ops", schema="global") op.drop_column("etl_task", "is_stale", schema="global") op.drop_column("etl_task", "full_config_hash", schema="global") # ### end Alembic commands ### From 048646cbc5b83c9b677769d7cfc6de5607da82de Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 12 Dec 2025 02:35:19 +0100 Subject: [PATCH 7/8] chore: update submodules --- submodules/model | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/model b/submodules/model index eda6ab57..144dde2a 160000 --- a/submodules/model +++ b/submodules/model @@ -1 +1 @@ -Subproject commit eda6ab5782654d1cb44c580a8ef882766b377899 +Subproject commit 144dde2af3d2c940ee13df161194c4aa76a0c197 From a6e6cfb3028d6acd5e44582e907599e9616aa94f Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Fri, 12 Dec 2025 02:35:41 +0100 Subject: [PATCH 8/8] fix: default etl config id --- controller/transfer/cognition/minio_upload.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/controller/transfer/cognition/minio_upload.py b/controller/transfer/cognition/minio_upload.py index 85c82f1c..005b218e 100644 --- a/controller/transfer/cognition/minio_upload.py +++ b/controller/transfer/cognition/minio_upload.py @@ -54,12 +54,9 @@ def handle_cognition_file_upload(path_parts: List[str]): ): project_id = file_reference.meta_data.get("project_id") conversation_id = file_reference.meta_data.get("conversation_id") - etl_config_id = file_reference.meta_data.get( - "etl_config_id" - ) or project_db_co.get_default_etl_config_id(project_id) full_config, tokenizer = etl_utils.get_full_config_and_tokenizer_from_config_id( file_reference, - etl_config_id=etl_config_id, + etl_config_id=project_db_co.get_default_etl_config_id(project_id), project_id=project_id, conversation_id=conversation_id, )