Fix probes=None server incompatibility (#3543)

jvstme · web-flow · commit 3149be8ff126 · 2026-02-05T07:39:37.000Z
This fixes server compatibility with clients prior
to 0.20.8 that don't support `probes=None`, by
replacing `None` with `[]` in responses for older
clients. This incompatibility could be observed
when there are both new and old clients in the
same project, so old clients would fail when
viewing runs submitted by new clients.
diff --git a/src/dstack/_internal/server/compatibility/runs.py b/src/dstack/_internal/server/compatibility/runs.py
@@ -0,0 +1,36 @@
+from typing import Optional
+
+from packaging.version import Version
+
+from dstack._internal.core.models.configurations import ServiceConfiguration
+from dstack._internal.core.models.runs import Run, RunPlan, RunSpec
+from dstack._internal.server.compatibility.common import patch_offers_list
+
+
+def patch_run_plan(run_plan: RunPlan, client_version: Optional[Version]) -> None:
+    if client_version is None:
+        return
+    patch_run_spec(run_plan.run_spec, client_version)
+    if run_plan.effective_run_spec is not None:
+        patch_run_spec(run_plan.effective_run_spec, client_version)
+    if run_plan.current_resource is not None:
+        patch_run(run_plan.current_resource, client_version)
+    for job_plan in run_plan.job_plans:
+        patch_offers_list(job_plan.offers, client_version)
+
+
+def patch_run(run: Run, client_version: Optional[Version]) -> None:
+    if client_version is None:
+        return
+    patch_run_spec(run.run_spec, client_version)
+
+
+def patch_run_spec(run_spec: RunSpec, client_version: Optional[Version]) -> None:
+    if client_version is None:
+        return
+    # Clients prior to 0.20.8 do not support probes = None
+    if client_version < Version("0.20.8") and isinstance(
+        run_spec.configuration, ServiceConfiguration
+    ):
+        if run_spec.configuration.probes is None:
+            run_spec.configuration.probes = []
diff --git a/src/dstack/_internal/server/routers/runs.py b/src/dstack/_internal/server/routers/runs.py
@@ -6,7 +6,7 @@
 
 from dstack._internal.core.errors import ResourceNotExistsError
 from dstack._internal.core.models.runs import Run, RunPlan
-from dstack._internal.server.compatibility.common import patch_offers_list
+from dstack._internal.server.compatibility.runs import patch_run, patch_run_plan
 from dstack._internal.server.db import get_session
 from dstack._internal.server.models import ProjectModel, UserModel
 from dstack._internal.server.schemas.runs import (
@@ -52,6 +52,7 @@ async def list_runs(
     body: ListRunsRequest,
     session: AsyncSession = Depends(get_session),
     user: UserModel = Depends(Authenticated()),
+    client_version: Optional[Version] = Depends(get_client_version),
 ):
     """
     Returns all runs visible to user sorted by descending `submitted_at`.
@@ -62,22 +63,23 @@ async def list_runs(
     The results are paginated. To get the next page, pass `submitted_at` and `id` of
     the last run from the previous page as `prev_submitted_at` and `prev_run_id`.
     """
-    return CustomORJSONResponse(
-        await runs.list_user_runs(
-            session=session,
-            user=user,
-            project_name=body.project_name,
-            repo_id=body.repo_id,
-            username=body.username,
-            only_active=body.only_active,
-            include_jobs=body.include_jobs,
-            job_submissions_limit=body.job_submissions_limit,
-            prev_submitted_at=body.prev_submitted_at,
-            prev_run_id=body.prev_run_id,
-            limit=body.limit,
-            ascending=body.ascending,
-        )
+    run_list = await runs.list_user_runs(
+        session=session,
+        user=user,
+        project_name=body.project_name,
+        repo_id=body.repo_id,
+        username=body.username,
+        only_active=body.only_active,
+        include_jobs=body.include_jobs,
+        job_submissions_limit=body.job_submissions_limit,
+        prev_submitted_at=body.prev_submitted_at,
+        prev_run_id=body.prev_run_id,
+        limit=body.limit,
+        ascending=body.ascending,
     )
+    for run in run_list:
+        patch_run(run, client_version)
+    return CustomORJSONResponse(run_list)
 
 
 @project_router.post(
@@ -88,6 +90,7 @@ async def get_run(
     body: GetRunRequest,
     session: AsyncSession = Depends(get_session),
     user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectMember()),
+    client_version: Optional[Version] = Depends(get_client_version),
 ):
     """
     Returns a run given `run_name` or `id`.
@@ -103,6 +106,7 @@ async def get_run(
     )
     if run is None:
         raise ResourceNotExistsError("Run not found")
+    patch_run(run, client_version)
     return CustomORJSONResponse(run)
 
 
@@ -132,8 +136,7 @@ async def get_plan(
         max_offers=body.max_offers,
         legacy_repo_dir=legacy_repo_dir,
     )
-    for job_plan in run_plan.job_plans:
-        patch_offers_list(job_plan.offers, client_version)
+    patch_run_plan(run_plan, client_version)
     return CustomORJSONResponse(run_plan)
 
 
@@ -146,6 +149,7 @@ async def apply_plan(
     session: Annotated[AsyncSession, Depends(get_session)],
     user_project: Annotated[tuple[UserModel, ProjectModel], Depends(ProjectMember())],
     legacy_repo_dir: Annotated[bool, Depends(use_legacy_repo_dir)],
+    client_version: Annotated[Optional[Version], Depends(get_client_version)],
 ):
     """
     Creates a new run or updates an existing run.
@@ -156,16 +160,16 @@ async def apply_plan(
     user, project = user_project
     if not user.ssh_public_key and not body.plan.run_spec.ssh_key_pub:
         await users.refresh_ssh_key(session=session, actor=user)
-    return CustomORJSONResponse(
-        await runs.apply_plan(
-            session=session,
-            user=user,
-            project=project,
-            plan=body.plan,
-            force=body.force,
-            legacy_repo_dir=legacy_repo_dir,
-        )
+    run = await runs.apply_plan(
+        session=session,
+        user=user,
+        project=project,
+        plan=body.plan,
+        force=body.force,
+        legacy_repo_dir=legacy_repo_dir,
     )
+    patch_run(run, client_version)
+    return CustomORJSONResponse(run)
 
 
 @project_router.post("/stop")
diff --git a/src/tests/_internal/server/routers/test_runs.py b/src/tests/_internal/server/routers/test_runs.py
@@ -941,6 +941,53 @@ async def test_limits_job_submissions(
             },
         ]
 
+    @pytest.mark.asyncio
+    @pytest.mark.parametrize(
+        "client_version,expected_probes",
+        [
+            ("0.20.7", []),
+            ("0.20.8", None),
+            (None, None),
+        ],
+    )
+    @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
+    async def test_patches_service_configuration_probes_for_old_clients(
+        self,
+        test_db,
+        session: AsyncSession,
+        client: AsyncClient,
+        client_version: Optional[str],
+        expected_probes: Optional[list],
+    ) -> None:
+        user = await create_user(session=session)
+        project = await create_project(session=session, owner=user)
+        repo = await create_repo(session=session, project_id=project.id)
+
+        service_conf = ServiceConfiguration(
+            commands=["echo hello"],
+            port=80,
+            probes=None,  # This should be patched to [] for clients prior to 0.20.8
+        )
+        run_spec = get_run_spec(
+            configuration=service_conf,
+            repo_id=repo.name,
+        )
+        await create_run(session=session, project=project, repo=repo, user=user, run_spec=run_spec)
+
+        headers = get_auth_headers(user.token)
+        if client_version is not None:
+            headers["X-API-Version"] = client_version
+        response = await client.post(
+            "/api/runs/list",
+            headers=headers,
+            json={"project_name": project.name},
+        )
+
+        assert response.status_code == 200
+        runs_list = response.json()
+        assert len(runs_list) == 1
+        assert runs_list[0]["run_spec"]["configuration"]["probes"] == expected_probes
+
 
 class TestGetRun:
     @pytest.mark.asyncio
@@ -1020,6 +1067,53 @@ async def test_returns_deleted_run_given_id(
         assert response.status_code == 200, response.json()
         assert response.json()["id"] == str(run.id)
 
+    @pytest.mark.asyncio
+    @pytest.mark.parametrize(
+        "client_version,expected_probes",
+        [
+            ("0.20.7", []),
+            ("0.20.8", None),
+            (None, None),
+        ],
+    )
+    @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
+    async def test_patches_service_configuration_probes_for_old_clients(
+        self,
+        test_db,
+        session: AsyncSession,
+        client: AsyncClient,
+        client_version: Optional[str],
+        expected_probes: Optional[list],
+    ) -> None:
+        user = await create_user(session=session)
+        project = await create_project(session=session, owner=user)
+        repo = await create_repo(session=session, project_id=project.id)
+
+        service_conf = ServiceConfiguration(
+            commands=["echo hello"],
+            port=80,
+            probes=None,  # This should be patched to [] for clients prior to 0.20.8
+        )
+        run_spec = get_run_spec(
+            configuration=service_conf,
+            repo_id=repo.name,
+        )
+        run = await create_run(
+            session=session, project=project, repo=repo, user=user, run_spec=run_spec
+        )
+
+        headers = get_auth_headers(user.token)
+        if client_version is not None:
+            headers["X-API-Version"] = client_version
+        response = await client.post(
+            f"/api/project/{project.name}/runs/get",
+            headers=headers,
+            json={"run_name": run.run_name},
+        )
+
+        assert response.status_code == 200
+        assert response.json()["run_spec"]["configuration"]["probes"] == expected_probes
+
 
 class TestGetRunPlan:
     @pytest.mark.asyncio
@@ -1477,6 +1571,65 @@ async def test_generates_user_ssh_key(self, session: AsyncSession, client: Async
         assert user.ssh_public_key == run_spec_ssh_public_key
         assert user.ssh_private_key is not None
 
+    @pytest.mark.asyncio
+    @pytest.mark.parametrize(
+        "client_version,expected_probes",
+        [
+            ("0.20.7", []),
+            ("0.20.8", None),
+            (None, None),
+        ],
+    )
+    @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
+    async def test_patches_service_configuration_probes_for_old_clients(
+        self,
+        test_db,
+        session: AsyncSession,
+        client: AsyncClient,
+        client_version: Optional[str],
+        expected_probes: Optional[list],
+    ) -> None:
+        user = await create_user(session=session)
+        project = await create_project(session=session, owner=user)
+        repo = await create_repo(session=session, project_id=project.id)
+
+        service_conf = ServiceConfiguration(
+            commands=["echo hello"],
+            port=80,
+            probes=None,  # This should be patched to [] for clients prior to 0.20.8
+        )
+        run_spec = get_run_spec(
+            run_name="test-service",
+            configuration=service_conf,
+            repo_id=repo.name,
+        )
+        await create_run(
+            session=session,
+            project=project,
+            repo=repo,
+            user=user,
+            run_spec=run_spec,
+            run_name="test-service",
+        )
+
+        body = {"run_spec": run_spec.dict()}
+        headers = get_auth_headers(user.token)
+        if client_version is not None:
+            headers["X-API-Version"] = client_version
+        response = await client.post(
+            f"/api/project/{project.name}/runs/get_plan",
+            headers=headers,
+            json=body,
+        )
+
+        assert response.status_code == 200
+        run_plan = response.json()
+        assert run_plan["run_spec"]["configuration"]["probes"] == expected_probes
+        assert run_plan["effective_run_spec"]["configuration"]["probes"] == expected_probes
+        assert (
+            run_plan["current_resource"]["run_spec"]["configuration"]["probes"] == expected_probes
+        )
+
 
 class TestApplyPlan:
     @pytest.mark.asyncio
@@ -1668,6 +1821,57 @@ async def test_generates_user_ssh_key(self, session: AsyncSession, client: Async
         assert user.ssh_public_key == run_spec_ssh_public_key
         assert user.ssh_private_key is not None
 
+    @pytest.mark.asyncio
+    @pytest.mark.parametrize(
+        "client_version,expected_probes",
+        [
+            ("0.20.7", []),  # Prior to 0.20.8, probes=None should be patched to []
+            ("0.20.8", None),  # 0.20.8 and later should keep probes=None
+            (None, None),  # None client version should keep probes=None
+        ],
+    )
+    @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
+    async def test_patches_service_configuration_probes_for_old_clients(
+        self,
+        test_db,
+        session: AsyncSession,
+        client: AsyncClient,
+        client_version: Optional[str],
+        expected_probes: Optional[list],
+    ) -> None:
+        user = await create_user(session=session)
+        project = await create_project(session=session, owner=user)
+        repo = await create_repo(session=session, project_id=project.id)
+
+        service_conf = ServiceConfiguration(
+            commands=["echo hello"],
+            port=80,
+            probes=None,  # This should be patched to [] for clients prior to 0.20.8
+        )
+        run_spec = get_run_spec(
+            run_name="test-service",
+            configuration=service_conf,
+            repo_id=repo.name,
+        )
+
+        headers = get_auth_headers(user.token)
+        if client_version is not None:
+            headers["X-API-Version"] = client_version
+        response = await client.post(
+            f"/api/project/{project.name}/runs/apply",
+            headers=headers,
+            json={
+                "plan": {
+                    "run_spec": run_spec.dict(),
+                    "current_resource": None,
+                },
+                "force": False,
+            },
+        )
+
+        assert response.status_code == 200
+        assert response.json()["run_spec"]["configuration"]["probes"] == expected_probes
+
 
 class TestSubmitRun:
     @pytest.mark.asyncio