From fba49a54919a10479028625d37ad06b31d30ff53 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Mon, 2 Feb 2026 23:54:39 -0500 Subject: [PATCH 01/12] fix: Add timeout configuration for MCP servers Add configurable Istio VirtualService timeout for MCP servers to prevent 30-second default timeout errors on long-running operations. - MCP servers (passthrough forwarder with /mcp routes): 5-minute timeout - All other endpoints: explicit 30-second timeout (Istio default) Always sets an explicit timeout value to avoid YAML formatting issues. --- .../templates/service_template_config_map.yaml | 1 + .../gateways/resources/k8s_resource_types.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/charts/model-engine/templates/service_template_config_map.yaml b/charts/model-engine/templates/service_template_config_map.yaml index 80d210a0..f414e481 100644 --- a/charts/model-engine/templates/service_template_config_map.yaml +++ b/charts/model-engine/templates/service_template_config_map.yaml @@ -941,6 +941,7 @@ data: host: "${RESOURCE_NAME}.${NAMESPACE}.svc.cluster.local" port: number: 80 + ${MCP_TIMEOUT} {{- end }} {{- if .Values.destinationrule.enabled }} destination-rule.yaml: |- diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index 03c99cd2..d1d804f2 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -382,6 +382,7 @@ class VirtualServiceArguments(_BaseEndpointArguments): """Keyword-arguments for substituting into virtual-service templates.""" DNS_HOST_DOMAIN: str + MCP_TIMEOUT: str # "timeout: 30s" (default) or "timeout: 300s" (MCP servers) class LwsServiceEntryArguments(_BaseEndpointArguments): @@ -1361,6 +1362,22 @@ def get_endpoint_resource_arguments_from_request( SERVICE_NAME_OVERRIDE=service_name_override, ) elif endpoint_resource_name == "virtual-service": + # Set 5-minute timeout for MCP servers to fix 30-second default timeout issue + # MCP servers use passthrough forwarder and have routes containing /mcp + is_mcp_server = False + if isinstance(flavor, RunnableImageLike) and flavor.forwarder_type == "passthrough": + all_routes = [] + if flavor.predict_route: + all_routes.append(flavor.predict_route) + if flavor.routes: + all_routes.extend(flavor.routes) + if flavor.extra_routes: + all_routes.extend(flavor.extra_routes) + is_mcp_server = any("/mcp" in route.lower() for route in all_routes) + # Always set explicit timeout to avoid empty string YAML formatting issues + # MCP servers get 5 minutes, others get explicit 30s default + timeout = "timeout: 300s" if is_mcp_server else "timeout: 30s" + return VirtualServiceArguments( # Base resource arguments RESOURCE_NAME=k8s_resource_group_name, @@ -1373,6 +1390,7 @@ def get_endpoint_resource_arguments_from_request( OWNER=owner, GIT_TAG=GIT_TAG, DNS_HOST_DOMAIN=infra_config().dns_host_domain, + MCP_TIMEOUT=timeout, ) elif endpoint_resource_name == "destination-rule": return DestinationRuleArguments( From e9d0586034bf814021076cd736e435f00628dde2 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Mon, 2 Feb 2026 23:56:56 -0500 Subject: [PATCH 02/12] fix: Add configurable timeout for Istio VirtualService Add request_timeout_seconds field to RunnableImageLike flavor to allow configurable Istio VirtualService timeout per endpoint. - Add request_timeout_seconds: Optional[int] field to RunnableImageLike - Add database column and migration - Update VirtualService logic to use configurable timeout instead of MCP detection heuristic - Defaults to 30s (Istio default) when not specified This allows any endpoint (not just MCP servers) to configure a custom timeout by setting request_timeout_seconds in their flavor config. --- ...f04b2bc9af4_add_request_timeout_seconds.py | 32 +++++++++++++++++++ .../db/models/hosted_model_inference.py | 3 ++ .../domain/entities/model_bundle_entity.py | 1 + .../gateways/resources/k8s_resource_types.py | 21 ++++-------- .../db_model_bundle_repository.py | 2 ++ 5 files changed, 44 insertions(+), 15 deletions(-) create mode 100644 model-engine/model_engine_server/db/migrations/alembic/versions/2026_02_02_2356-6f04b2bc9af4_add_request_timeout_seconds.py diff --git a/model-engine/model_engine_server/db/migrations/alembic/versions/2026_02_02_2356-6f04b2bc9af4_add_request_timeout_seconds.py b/model-engine/model_engine_server/db/migrations/alembic/versions/2026_02_02_2356-6f04b2bc9af4_add_request_timeout_seconds.py new file mode 100644 index 00000000..53f8ef01 --- /dev/null +++ b/model-engine/model_engine_server/db/migrations/alembic/versions/2026_02_02_2356-6f04b2bc9af4_add_request_timeout_seconds.py @@ -0,0 +1,32 @@ +"""add request timeout seconds column + +Revision ID: 6f04b2bc9af4 +Revises: 221aa19d3f32 +Create Date: 2026-02-02 23:56:00.000000 + +""" +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = '6f04b2bc9af4' +down_revision = '221aa19d3f32' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.add_column( + 'bundles', + sa.Column('runnable_image_request_timeout_seconds', sa.Integer(), nullable=True), + schema='hosted_model_inference', + ) + + +def downgrade() -> None: + op.drop_column( + 'bundles', + 'runnable_image_request_timeout_seconds', + schema='hosted_model_inference', + ) + diff --git a/model-engine/model_engine_server/db/models/hosted_model_inference.py b/model-engine/model_engine_server/db/models/hosted_model_inference.py index c5b8247f..968b7179 100644 --- a/model-engine/model_engine_server/db/models/hosted_model_inference.py +++ b/model-engine/model_engine_server/db/models/hosted_model_inference.py @@ -151,6 +151,7 @@ class Bundle(Base): runnable_image_forwarder_type = Column(Text, nullable=True) runnable_image_worker_command = Column(ARRAY(Text), nullable=True) runnable_image_worker_env = Column(JSON, nullable=True) + runnable_image_request_timeout_seconds = Column(Integer, nullable=True) # Streaming Enhanced Runnable Image fields streaming_enhanced_runnable_image_streaming_command = Column(ARRAY(Text), nullable=True) @@ -215,6 +216,7 @@ def __init__( runnable_image_forwarder_type: Optional[str] = None, runnable_image_worker_command: Optional[List[str]] = None, runnable_image_worker_env: Optional[Dict[str, Any]] = None, + runnable_image_request_timeout_seconds: Optional[int] = None, # Streaming Enhanced Runnable Image fields streaming_enhanced_runnable_image_streaming_command: Optional[List[str]] = None, streaming_enhanced_runnable_image_streaming_predict_route: Optional[str] = None, @@ -275,6 +277,7 @@ def __init__( self.runnable_image_forwarder_type = runnable_image_forwarder_type self.runnable_image_worker_command = runnable_image_worker_command self.runnable_image_worker_env = runnable_image_worker_env + self.runnable_image_request_timeout_seconds = runnable_image_request_timeout_seconds self.runnable_image_readiness_initial_delay_seconds = ( runnable_image_readiness_initial_delay_seconds ) diff --git a/model-engine/model_engine_server/domain/entities/model_bundle_entity.py b/model-engine/model_engine_server/domain/entities/model_bundle_entity.py index 2a5a4863..40153217 100644 --- a/model-engine/model_engine_server/domain/entities/model_bundle_entity.py +++ b/model-engine/model_engine_server/domain/entities/model_bundle_entity.py @@ -167,6 +167,7 @@ class RunnableImageLike(BaseModel, ABC): forwarder_type: Optional[str] = ForwarderType.DEFAULT.value worker_command: Optional[List[str]] = None worker_env: Optional[Dict[str, str]] = None + request_timeout_seconds: Optional[int] = None # Istio VirtualService timeout (None = 30s default) class RunnableImageFlavor(RunnableImageLike): diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index d1d804f2..f3ed1f7c 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -382,7 +382,7 @@ class VirtualServiceArguments(_BaseEndpointArguments): """Keyword-arguments for substituting into virtual-service templates.""" DNS_HOST_DOMAIN: str - MCP_TIMEOUT: str # "timeout: 30s" (default) or "timeout: 300s" (MCP servers) + MCP_TIMEOUT: str # Istio VirtualService timeout, e.g. "timeout: 30s" (default) or "timeout: 300s" class LwsServiceEntryArguments(_BaseEndpointArguments): @@ -1362,21 +1362,12 @@ def get_endpoint_resource_arguments_from_request( SERVICE_NAME_OVERRIDE=service_name_override, ) elif endpoint_resource_name == "virtual-service": - # Set 5-minute timeout for MCP servers to fix 30-second default timeout issue - # MCP servers use passthrough forwarder and have routes containing /mcp - is_mcp_server = False - if isinstance(flavor, RunnableImageLike) and flavor.forwarder_type == "passthrough": - all_routes = [] - if flavor.predict_route: - all_routes.append(flavor.predict_route) - if flavor.routes: - all_routes.extend(flavor.routes) - if flavor.extra_routes: - all_routes.extend(flavor.extra_routes) - is_mcp_server = any("/mcp" in route.lower() for route in all_routes) + # Use configurable timeout from flavor, defaulting to 30s (Istio default) # Always set explicit timeout to avoid empty string YAML formatting issues - # MCP servers get 5 minutes, others get explicit 30s default - timeout = "timeout: 300s" if is_mcp_server else "timeout: 30s" + timeout_seconds = 30 # Istio default + if isinstance(flavor, RunnableImageLike) and flavor.request_timeout_seconds is not None: + timeout_seconds = flavor.request_timeout_seconds + timeout = f"timeout: {timeout_seconds}s" return VirtualServiceArguments( # Base resource arguments diff --git a/model-engine/model_engine_server/infra/repositories/db_model_bundle_repository.py b/model-engine/model_engine_server/infra/repositories/db_model_bundle_repository.py index 7072d2ca..0b3eaffd 100644 --- a/model-engine/model_engine_server/infra/repositories/db_model_bundle_repository.py +++ b/model-engine/model_engine_server/infra/repositories/db_model_bundle_repository.py @@ -151,6 +151,7 @@ def translate_model_bundle_orm_to_model_bundle( forwarder_type=model_bundle_orm.runnable_image_forwarder_type, worker_command=model_bundle_orm.runnable_image_worker_command, worker_env=model_bundle_orm.runnable_image_worker_env, + request_timeout_seconds=model_bundle_orm.runnable_image_request_timeout_seconds, streaming_command=model_bundle_orm.streaming_enhanced_runnable_image_streaming_command, streaming_predict_route=model_bundle_orm.streaming_enhanced_runnable_image_streaming_predict_route, triton_model_repository=model_bundle_orm.triton_enhanced_runnable_image_model_repository, @@ -224,6 +225,7 @@ def translate_kwargs_to_model_bundle_orm( runnable_image_forwarder_type=flavor_dict.get("forwarder_type"), runnable_image_worker_command=flavor_dict.get("worker_command"), runnable_image_worker_env=flavor_dict.get("worker_env"), + runnable_image_request_timeout_seconds=flavor_dict.get("request_timeout_seconds"), streaming_enhanced_runnable_image_streaming_command=flavor_dict.get("streaming_command"), streaming_enhanced_runnable_image_streaming_predict_route=flavor_dict.get( "streaming_predict_route" From 60fc061248b34d68e1e075f00350b2b33bbeba46 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Mon, 2 Feb 2026 23:57:54 -0500 Subject: [PATCH 03/12] fix: Restore MCP detection, make timeout configurable - Restore MCP server detection logic (passthrough forwarder + /mcp routes) - MCP servers: use request_timeout_seconds if set, otherwise 30s - Non-MCP servers: do nothing (empty string, use Istio default) --- .../gateways/resources/k8s_resource_types.py | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index f3ed1f7c..68069c19 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -382,7 +382,7 @@ class VirtualServiceArguments(_BaseEndpointArguments): """Keyword-arguments for substituting into virtual-service templates.""" DNS_HOST_DOMAIN: str - MCP_TIMEOUT: str # Istio VirtualService timeout, e.g. "timeout: 30s" (default) or "timeout: 300s" + MCP_TIMEOUT: str # "" for non-MCP servers (use Istio default), or "timeout: Xs" for MCP servers class LwsServiceEntryArguments(_BaseEndpointArguments): @@ -1362,12 +1362,23 @@ def get_endpoint_resource_arguments_from_request( SERVICE_NAME_OVERRIDE=service_name_override, ) elif endpoint_resource_name == "virtual-service": - # Use configurable timeout from flavor, defaulting to 30s (Istio default) - # Always set explicit timeout to avoid empty string YAML formatting issues - timeout_seconds = 30 # Istio default - if isinstance(flavor, RunnableImageLike) and flavor.request_timeout_seconds is not None: - timeout_seconds = flavor.request_timeout_seconds - timeout = f"timeout: {timeout_seconds}s" + # Set timeout for MCP servers only + # MCP servers use passthrough forwarder and have routes containing /mcp + timeout = "" # Default: no timeout set, use Istio default + if isinstance(flavor, RunnableImageLike) and flavor.forwarder_type == "passthrough": + all_routes = [] + if flavor.predict_route: + all_routes.append(flavor.predict_route) + if flavor.routes: + all_routes.extend(flavor.routes) + if flavor.extra_routes: + all_routes.extend(flavor.extra_routes) + is_mcp_server = any("/mcp" in route.lower() for route in all_routes) + + if is_mcp_server: + # Use configurable timeout if set, otherwise default to 30s + timeout_seconds = flavor.request_timeout_seconds if flavor.request_timeout_seconds is not None else 30 + timeout = f"timeout: {timeout_seconds}s" return VirtualServiceArguments( # Base resource arguments From 2fd52d0963ac6312fca62f32b6225dbe82353994 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Mon, 2 Feb 2026 23:58:43 -0500 Subject: [PATCH 04/12] refactor: Remove database migration - timeout doesn't need DB persistence request_timeout_seconds is a runtime configuration field that doesn't need to be persisted in the database. It can be set via API when creating/updating bundles but won't be stored in DB columns. --- ...f04b2bc9af4_add_request_timeout_seconds.py | 32 ------------------- 1 file changed, 32 deletions(-) delete mode 100644 model-engine/model_engine_server/db/migrations/alembic/versions/2026_02_02_2356-6f04b2bc9af4_add_request_timeout_seconds.py diff --git a/model-engine/model_engine_server/db/migrations/alembic/versions/2026_02_02_2356-6f04b2bc9af4_add_request_timeout_seconds.py b/model-engine/model_engine_server/db/migrations/alembic/versions/2026_02_02_2356-6f04b2bc9af4_add_request_timeout_seconds.py deleted file mode 100644 index 53f8ef01..00000000 --- a/model-engine/model_engine_server/db/migrations/alembic/versions/2026_02_02_2356-6f04b2bc9af4_add_request_timeout_seconds.py +++ /dev/null @@ -1,32 +0,0 @@ -"""add request timeout seconds column - -Revision ID: 6f04b2bc9af4 -Revises: 221aa19d3f32 -Create Date: 2026-02-02 23:56:00.000000 - -""" -import sqlalchemy as sa -from alembic import op - -# revision identifiers, used by Alembic. -revision = '6f04b2bc9af4' -down_revision = '221aa19d3f32' -branch_labels = None -depends_on = None - - -def upgrade() -> None: - op.add_column( - 'bundles', - sa.Column('runnable_image_request_timeout_seconds', sa.Integer(), nullable=True), - schema='hosted_model_inference', - ) - - -def downgrade() -> None: - op.drop_column( - 'bundles', - 'runnable_image_request_timeout_seconds', - schema='hosted_model_inference', - ) - From 3f4123e885d00aeef2df4f60bf94b4c264aae704 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Tue, 3 Feb 2026 00:00:39 -0500 Subject: [PATCH 05/12] refactor: Use global env var for MCP timeout instead of DB persistence - Add MCP_TIMEOUT_SECONDS env var (defaults to 30s) - Remove request_timeout_seconds from RunnableImageLike model - Remove all database model changes and translation logic - MCP servers use MCP_TIMEOUT_SECONDS env var for timeout - Non-MCP servers use Istio default (no timeout set) --- model-engine/model_engine_server/common/env_vars.py | 4 ++++ .../model_engine_server/db/models/hosted_model_inference.py | 3 --- .../domain/entities/model_bundle_entity.py | 1 - .../infra/gateways/resources/k8s_resource_types.py | 6 ++---- .../infra/repositories/db_model_bundle_repository.py | 2 -- 5 files changed, 6 insertions(+), 10 deletions(-) diff --git a/model-engine/model_engine_server/common/env_vars.py b/model-engine/model_engine_server/common/env_vars.py index 2a69cbff..2ae8ff85 100644 --- a/model-engine/model_engine_server/common/env_vars.py +++ b/model-engine/model_engine_server/common/env_vars.py @@ -16,6 +16,7 @@ "LAUNCH_SERVICE_TEMPLATE_CONFIG_MAP_PATH", "LAUNCH_SERVICE_TEMPLATE_FOLDER", "LOCAL", + "MCP_TIMEOUT_SECONDS", "SKIP_AUTH", "WORKSPACE", "get_boolean_env_var", @@ -78,3 +79,6 @@ def get_boolean_env_var(name: str) -> bool: GIT_TAG: str = os.environ.get("GIT_TAG", "GIT_TAG_NOT_FOUND") if GIT_TAG == "GIT_TAG_NOT_FOUND" and "pytest" not in sys.modules: raise ValueError("GIT_TAG environment variable must be set") + +MCP_TIMEOUT_SECONDS: int = int(os.environ.get("MCP_TIMEOUT_SECONDS", "30")) +"""Timeout in seconds for MCP server Istio VirtualService. Defaults to 30 seconds.""" diff --git a/model-engine/model_engine_server/db/models/hosted_model_inference.py b/model-engine/model_engine_server/db/models/hosted_model_inference.py index 968b7179..c5b8247f 100644 --- a/model-engine/model_engine_server/db/models/hosted_model_inference.py +++ b/model-engine/model_engine_server/db/models/hosted_model_inference.py @@ -151,7 +151,6 @@ class Bundle(Base): runnable_image_forwarder_type = Column(Text, nullable=True) runnable_image_worker_command = Column(ARRAY(Text), nullable=True) runnable_image_worker_env = Column(JSON, nullable=True) - runnable_image_request_timeout_seconds = Column(Integer, nullable=True) # Streaming Enhanced Runnable Image fields streaming_enhanced_runnable_image_streaming_command = Column(ARRAY(Text), nullable=True) @@ -216,7 +215,6 @@ def __init__( runnable_image_forwarder_type: Optional[str] = None, runnable_image_worker_command: Optional[List[str]] = None, runnable_image_worker_env: Optional[Dict[str, Any]] = None, - runnable_image_request_timeout_seconds: Optional[int] = None, # Streaming Enhanced Runnable Image fields streaming_enhanced_runnable_image_streaming_command: Optional[List[str]] = None, streaming_enhanced_runnable_image_streaming_predict_route: Optional[str] = None, @@ -277,7 +275,6 @@ def __init__( self.runnable_image_forwarder_type = runnable_image_forwarder_type self.runnable_image_worker_command = runnable_image_worker_command self.runnable_image_worker_env = runnable_image_worker_env - self.runnable_image_request_timeout_seconds = runnable_image_request_timeout_seconds self.runnable_image_readiness_initial_delay_seconds = ( runnable_image_readiness_initial_delay_seconds ) diff --git a/model-engine/model_engine_server/domain/entities/model_bundle_entity.py b/model-engine/model_engine_server/domain/entities/model_bundle_entity.py index 40153217..2a5a4863 100644 --- a/model-engine/model_engine_server/domain/entities/model_bundle_entity.py +++ b/model-engine/model_engine_server/domain/entities/model_bundle_entity.py @@ -167,7 +167,6 @@ class RunnableImageLike(BaseModel, ABC): forwarder_type: Optional[str] = ForwarderType.DEFAULT.value worker_command: Optional[List[str]] = None worker_env: Optional[Dict[str, str]] = None - request_timeout_seconds: Optional[int] = None # Istio VirtualService timeout (None = 30s default) class RunnableImageFlavor(RunnableImageLike): diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index 68069c19..6207161c 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -6,7 +6,7 @@ from model_engine_server.common.config import hmi_config from model_engine_server.common.dtos.model_endpoints import BrokerName, BrokerType from model_engine_server.common.dtos.resource_manager import CreateOrUpdateResourcesRequest -from model_engine_server.common.env_vars import CIRCLECI, GIT_TAG +from model_engine_server.common.env_vars import CIRCLECI, GIT_TAG, MCP_TIMEOUT_SECONDS from model_engine_server.common.resource_limits import ( FORWARDER_CPU_USAGE, FORWARDER_MEMORY_USAGE, @@ -1376,9 +1376,7 @@ def get_endpoint_resource_arguments_from_request( is_mcp_server = any("/mcp" in route.lower() for route in all_routes) if is_mcp_server: - # Use configurable timeout if set, otherwise default to 30s - timeout_seconds = flavor.request_timeout_seconds if flavor.request_timeout_seconds is not None else 30 - timeout = f"timeout: {timeout_seconds}s" + timeout = f"timeout: {MCP_TIMEOUT_SECONDS}s" return VirtualServiceArguments( # Base resource arguments diff --git a/model-engine/model_engine_server/infra/repositories/db_model_bundle_repository.py b/model-engine/model_engine_server/infra/repositories/db_model_bundle_repository.py index 0b3eaffd..7072d2ca 100644 --- a/model-engine/model_engine_server/infra/repositories/db_model_bundle_repository.py +++ b/model-engine/model_engine_server/infra/repositories/db_model_bundle_repository.py @@ -151,7 +151,6 @@ def translate_model_bundle_orm_to_model_bundle( forwarder_type=model_bundle_orm.runnable_image_forwarder_type, worker_command=model_bundle_orm.runnable_image_worker_command, worker_env=model_bundle_orm.runnable_image_worker_env, - request_timeout_seconds=model_bundle_orm.runnable_image_request_timeout_seconds, streaming_command=model_bundle_orm.streaming_enhanced_runnable_image_streaming_command, streaming_predict_route=model_bundle_orm.streaming_enhanced_runnable_image_streaming_predict_route, triton_model_repository=model_bundle_orm.triton_enhanced_runnable_image_model_repository, @@ -225,7 +224,6 @@ def translate_kwargs_to_model_bundle_orm( runnable_image_forwarder_type=flavor_dict.get("forwarder_type"), runnable_image_worker_command=flavor_dict.get("worker_command"), runnable_image_worker_env=flavor_dict.get("worker_env"), - runnable_image_request_timeout_seconds=flavor_dict.get("request_timeout_seconds"), streaming_enhanced_runnable_image_streaming_command=flavor_dict.get("streaming_command"), streaming_enhanced_runnable_image_streaming_predict_route=flavor_dict.get( "streaming_predict_route" From 4ac50f7a7f987fea51425776e11782076604eba3 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Tue, 3 Feb 2026 00:01:39 -0500 Subject: [PATCH 06/12] docs: Update MCP_TIMEOUT comment --- .../infra/gateways/resources/k8s_resource_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index 6207161c..08fd1e72 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -382,7 +382,7 @@ class VirtualServiceArguments(_BaseEndpointArguments): """Keyword-arguments for substituting into virtual-service templates.""" DNS_HOST_DOMAIN: str - MCP_TIMEOUT: str # "" for non-MCP servers (use Istio default), or "timeout: Xs" for MCP servers + MCP_TIMEOUT: str # Defaults to 30s, only applies to MCP servers class LwsServiceEntryArguments(_BaseEndpointArguments): From 2d02619f3c9a02c4fac3e0a99c2a4d8d3baec1e2 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Tue, 3 Feb 2026 00:02:35 -0500 Subject: [PATCH 07/12] refactor: Remove redundant comments --- .../infra/gateways/resources/k8s_resource_types.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index 08fd1e72..c8f80ddc 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -1362,9 +1362,8 @@ def get_endpoint_resource_arguments_from_request( SERVICE_NAME_OVERRIDE=service_name_override, ) elif endpoint_resource_name == "virtual-service": - # Set timeout for MCP servers only # MCP servers use passthrough forwarder and have routes containing /mcp - timeout = "" # Default: no timeout set, use Istio default + timeout = "" if isinstance(flavor, RunnableImageLike) and flavor.forwarder_type == "passthrough": all_routes = [] if flavor.predict_route: From b48bdcbe406e8dfdccf7fc74fbe522c56be83f58 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Tue, 3 Feb 2026 00:11:55 -0500 Subject: [PATCH 08/12] fix: Remove trailing whitespace to pass Black formatting check --- .../infra/gateways/resources/k8s_resource_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index c8f80ddc..2ae4cbec 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -1373,7 +1373,7 @@ def get_endpoint_resource_arguments_from_request( if flavor.extra_routes: all_routes.extend(flavor.extra_routes) is_mcp_server = any("/mcp" in route.lower() for route in all_routes) - + if is_mcp_server: timeout = f"timeout: {MCP_TIMEOUT_SECONDS}s" From 1213638f8edf11af27aa4a0c4ba171a88260314d Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Tue, 3 Feb 2026 00:28:50 -0500 Subject: [PATCH 09/12] test: Add tests for MCP timeout in VirtualService arguments --- .../test_k8s_endpoint_resource_delegate.py | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/model-engine/tests/unit/infra/gateways/resources/test_k8s_endpoint_resource_delegate.py b/model-engine/tests/unit/infra/gateways/resources/test_k8s_endpoint_resource_delegate.py index f19e66bc..a93b4e87 100644 --- a/model-engine/tests/unit/infra/gateways/resources/test_k8s_endpoint_resource_delegate.py +++ b/model-engine/tests/unit/infra/gateways/resources/test_k8s_endpoint_resource_delegate.py @@ -28,7 +28,10 @@ DictStrInt, DictStrStr, ResourceArguments, + get_endpoint_resource_arguments_from_request, ) +from model_engine_server.common.env_vars import MCP_TIMEOUT_SECONDS +from model_engine_server.domain.entities import RunnableImageFlavor from tests.unit.infra.gateways.k8s_fake_objects import FakeK8sDeploymentContainer, FakeK8sEnvVar MODULE_PATH = "model_engine_server.infra.gateways.resources.k8s_endpoint_resource_delegate" @@ -976,3 +979,74 @@ def test_add_pod_metadata_env_to_container(): node_name_env = next(e for e in container["env"] if e["name"] == "NODE_NAME") assert node_name_env["valueFrom"]["fieldRef"]["fieldPath"] == "spec.nodeName" + + +def test_virtual_service_mcp_timeout_mcp_server( + create_resources_request_sync_runnable_image: CreateOrUpdateResourcesRequest, +): + """Test that MCP servers get timeout set in VirtualService arguments.""" + # Modify the bundle flavor to be an MCP server (passthrough forwarder with /mcp route) + build_endpoint_request = create_resources_request_sync_runnable_image.build_endpoint_request + model_bundle = build_endpoint_request.model_endpoint_record.current_model_bundle + assert isinstance(model_bundle.flavor, RunnableImageFlavor) + + # Create a new flavor with passthrough forwarder and /mcp route + mcp_flavor = RunnableImageFlavor( + flavor="runnable_image", + repository=model_bundle.flavor.repository, + tag=model_bundle.flavor.tag, + command=model_bundle.flavor.command, + predict_route="/mcp/predict", # Contains /mcp + healthcheck_route=model_bundle.flavor.healthcheck_route, + env=model_bundle.flavor.env, + protocol=model_bundle.flavor.protocol, + readiness_initial_delay_seconds=model_bundle.flavor.readiness_initial_delay_seconds, + forwarder_type="passthrough", # Required for MCP detection + ) + + # Create a new bundle with MCP flavor + mcp_bundle = ModelBundle( + id=model_bundle.id, + name=model_bundle.name, + created_by=model_bundle.created_by, + owner=model_bundle.owner, + created_at=model_bundle.created_at, + model_artifact_ids=model_bundle.model_artifact_ids, + metadata=model_bundle.metadata, + flavor=mcp_flavor, + location=model_bundle.location, + requirements=model_bundle.requirements, + env_params=model_bundle.env_params, + packaging_type=model_bundle.packaging_type, + app_config=model_bundle.app_config, + ) + + # Update the request with MCP bundle + build_endpoint_request.model_endpoint_record.current_model_bundle = mcp_bundle + + # Get virtual service arguments + args = get_endpoint_resource_arguments_from_request( + k8s_resource_group_name="virtual-service", + request=create_resources_request_sync_runnable_image, + sqs_queue_name="test_queue", + sqs_queue_url="https://test_queue", + ) + + # Verify MCP_TIMEOUT is set correctly + assert args["MCP_TIMEOUT"] == f"timeout: {MCP_TIMEOUT_SECONDS}s" + + +def test_virtual_service_mcp_timeout_non_mcp_server( + create_resources_request_sync_runnable_image: CreateOrUpdateResourcesRequest, +): + """Test that non-MCP servers don't get timeout set (use Istio default).""" + # Get virtual service arguments for a regular (non-MCP) server + args = get_endpoint_resource_arguments_from_request( + k8s_resource_group_name="virtual-service", + request=create_resources_request_sync_runnable_image, + sqs_queue_name="test_queue", + sqs_queue_url="https://test_queue", + ) + + # Verify MCP_TIMEOUT is empty (use Istio default) + assert args["MCP_TIMEOUT"] == "" From b2409e133c5cf6eec44ea08831e53f16b1492ebc Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Tue, 3 Feb 2026 00:35:19 -0500 Subject: [PATCH 10/12] fix: Apply black formatting to test file --- .../resources/test_k8s_endpoint_resource_delegate.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/model-engine/tests/unit/infra/gateways/resources/test_k8s_endpoint_resource_delegate.py b/model-engine/tests/unit/infra/gateways/resources/test_k8s_endpoint_resource_delegate.py index a93b4e87..f8231870 100644 --- a/model-engine/tests/unit/infra/gateways/resources/test_k8s_endpoint_resource_delegate.py +++ b/model-engine/tests/unit/infra/gateways/resources/test_k8s_endpoint_resource_delegate.py @@ -989,7 +989,7 @@ def test_virtual_service_mcp_timeout_mcp_server( build_endpoint_request = create_resources_request_sync_runnable_image.build_endpoint_request model_bundle = build_endpoint_request.model_endpoint_record.current_model_bundle assert isinstance(model_bundle.flavor, RunnableImageFlavor) - + # Create a new flavor with passthrough forwarder and /mcp route mcp_flavor = RunnableImageFlavor( flavor="runnable_image", @@ -1003,7 +1003,7 @@ def test_virtual_service_mcp_timeout_mcp_server( readiness_initial_delay_seconds=model_bundle.flavor.readiness_initial_delay_seconds, forwarder_type="passthrough", # Required for MCP detection ) - + # Create a new bundle with MCP flavor mcp_bundle = ModelBundle( id=model_bundle.id, @@ -1020,10 +1020,10 @@ def test_virtual_service_mcp_timeout_mcp_server( packaging_type=model_bundle.packaging_type, app_config=model_bundle.app_config, ) - + # Update the request with MCP bundle build_endpoint_request.model_endpoint_record.current_model_bundle = mcp_bundle - + # Get virtual service arguments args = get_endpoint_resource_arguments_from_request( k8s_resource_group_name="virtual-service", @@ -1031,7 +1031,7 @@ def test_virtual_service_mcp_timeout_mcp_server( sqs_queue_name="test_queue", sqs_queue_url="https://test_queue", ) - + # Verify MCP_TIMEOUT is set correctly assert args["MCP_TIMEOUT"] == f"timeout: {MCP_TIMEOUT_SECONDS}s" @@ -1047,6 +1047,6 @@ def test_virtual_service_mcp_timeout_non_mcp_server( sqs_queue_name="test_queue", sqs_queue_url="https://test_queue", ) - + # Verify MCP_TIMEOUT is empty (use Istio default) assert args["MCP_TIMEOUT"] == "" From 5ae2fa8b9e7acf96a6c16d1ce6df6764325cc672 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Tue, 3 Feb 2026 00:47:29 -0500 Subject: [PATCH 11/12] fix: Fix import order for isort formatting check --- .../resources/test_k8s_endpoint_resource_delegate.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/model-engine/tests/unit/infra/gateways/resources/test_k8s_endpoint_resource_delegate.py b/model-engine/tests/unit/infra/gateways/resources/test_k8s_endpoint_resource_delegate.py index f8231870..b1ff4094 100644 --- a/model-engine/tests/unit/infra/gateways/resources/test_k8s_endpoint_resource_delegate.py +++ b/model-engine/tests/unit/infra/gateways/resources/test_k8s_endpoint_resource_delegate.py @@ -7,12 +7,13 @@ from kubernetes_asyncio.client.rest import ApiException from model_engine_server.common.config import hmi_config from model_engine_server.common.dtos.resource_manager import CreateOrUpdateResourcesRequest -from model_engine_server.common.env_vars import GIT_TAG +from model_engine_server.common.env_vars import GIT_TAG, MCP_TIMEOUT_SECONDS from model_engine_server.domain.entities import ( ModelBundle, ModelEndpointConfig, ModelEndpointType, ModelEndpointUserConfigState, + RunnableImageFlavor, ) from model_engine_server.domain.exceptions import EndpointResourceInfraException from model_engine_server.infra.gateways.resources.k8s_endpoint_resource_delegate import ( @@ -30,8 +31,6 @@ ResourceArguments, get_endpoint_resource_arguments_from_request, ) -from model_engine_server.common.env_vars import MCP_TIMEOUT_SECONDS -from model_engine_server.domain.entities import RunnableImageFlavor from tests.unit.infra.gateways.k8s_fake_objects import FakeK8sDeploymentContainer, FakeK8sEnvVar MODULE_PATH = "model_engine_server.infra.gateways.resources.k8s_endpoint_resource_delegate" From 422e7327801d64b1a9e8cb84adff454b8acef732 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Tue, 3 Feb 2026 01:09:25 -0500 Subject: [PATCH 12/12] fix: Add missing endpoint_resource_name parameter to test function calls --- .../test_k8s_endpoint_resource_delegate.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/model-engine/tests/unit/infra/gateways/resources/test_k8s_endpoint_resource_delegate.py b/model-engine/tests/unit/infra/gateways/resources/test_k8s_endpoint_resource_delegate.py index b1ff4094..ab4ae31b 100644 --- a/model-engine/tests/unit/infra/gateways/resources/test_k8s_endpoint_resource_delegate.py +++ b/model-engine/tests/unit/infra/gateways/resources/test_k8s_endpoint_resource_delegate.py @@ -1023,12 +1023,17 @@ def test_virtual_service_mcp_timeout_mcp_server( # Update the request with MCP bundle build_endpoint_request.model_endpoint_record.current_model_bundle = mcp_bundle + # Derive k8s_resource_group_name from endpoint_id + endpoint_id = build_endpoint_request.model_endpoint_record.id + k8s_resource_group_name = f"launch-endpoint-id-{endpoint_id}".replace("_", "-") + # Get virtual service arguments args = get_endpoint_resource_arguments_from_request( - k8s_resource_group_name="virtual-service", + k8s_resource_group_name=k8s_resource_group_name, request=create_resources_request_sync_runnable_image, sqs_queue_name="test_queue", sqs_queue_url="https://test_queue", + endpoint_resource_name="virtual-service", ) # Verify MCP_TIMEOUT is set correctly @@ -1039,12 +1044,18 @@ def test_virtual_service_mcp_timeout_non_mcp_server( create_resources_request_sync_runnable_image: CreateOrUpdateResourcesRequest, ): """Test that non-MCP servers don't get timeout set (use Istio default).""" + # Derive k8s_resource_group_name from endpoint_id + build_endpoint_request = create_resources_request_sync_runnable_image.build_endpoint_request + endpoint_id = build_endpoint_request.model_endpoint_record.id + k8s_resource_group_name = f"launch-endpoint-id-{endpoint_id}".replace("_", "-") + # Get virtual service arguments for a regular (non-MCP) server args = get_endpoint_resource_arguments_from_request( - k8s_resource_group_name="virtual-service", + k8s_resource_group_name=k8s_resource_group_name, request=create_resources_request_sync_runnable_image, sqs_queue_name="test_queue", sqs_queue_url="https://test_queue", + endpoint_resource_name="virtual-service", ) # Verify MCP_TIMEOUT is empty (use Istio default)