From a369cf9fc975981a96649aedf20ebbfa17e67f5a Mon Sep 17 00:00:00 2001
From: peterschmidt85 <andrey.cheptsov@gmail.com>
Date: Wed, 18 Feb 2026 14:21:15 +0100
Subject: [PATCH] Clarify why GPU vendor default inference is split between
 client and server; add TODOs on how this should change in the future (move
 resource defaults to the server).

---
 .../cli/services/configurators/run.py         | 19 ++++++++++---------
 .../_internal/server/services/resources.py    | 14 +++++++++++---
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/src/dstack/_internal/cli/services/configurators/run.py b/src/dstack/_internal/cli/services/configurators/run.py
index 0322cac22..6fc427a38 100644
--- a/src/dstack/_internal/cli/services/configurators/run.py
+++ b/src/dstack/_internal/cli/services/configurators/run.py
@@ -391,10 +391,14 @@ def validate_gpu_vendor_and_image(self, conf: RunConfigurationT) -> None:
         Infers GPU vendor if not set. Defaults to Nvidia when using the default
         CUDA image. Requires explicit `image` if the vendor is AMD or Tenstorrent.
 
-        NOTE: We don't set the inferred vendor on gpu_spec for compatibility with
-        older servers. Servers set the vendor using the same logic in
-        set_resources_defaults(). The inferred vendor is used here only for
-        validation and display (see _infer_gpu_vendor).
+        When vendor is inferred from GPU name (e.g. A100 -> nvidia), it is written to
+        gpu_spec. When vendor is inferred from image context (no name, no vendor, default
+        CUDA image -> nvidia), it is NOT written to gpu_spec because 0.19.x servers
+        (gpuhunt <0.1.12) break on vendor=nvidia + min_gpu_count=0. The server applies
+        the same default in set_gpu_vendor_default().
+
+        TODO: This entire method should move to the server (set_resources_defaults)
+        so that defaults and validation are equal for CLI and API users.
         """
         gpu_spec = conf.resources.gpu
         if gpu_spec is None:
@@ -439,11 +443,8 @@ def validate_gpu_vendor_and_image(self, conf: RunConfigurationT) -> None:
                 # Set vendor inferred from name on the spec (server needs it for filtering).
                 gpu_spec.vendor = vendor
             else:
-                # No vendor or name specified. Default to Nvidia if using the default
-                # CUDA image, since it's only compatible with Nvidia GPUs.
-                # We don't set the inferred vendor on the spec — the server does the
-                # same inference in set_resources_defaults() for compatibility with
-                # older servers that don't handle vendor + count.min=0 correctly.
+                # No vendor or name specified. Default to Nvidia if using the
+                # default CUDA image, since it's only compatible with Nvidia GPUs.
                 if conf.image is None and conf.docker is not True:
                     vendor = gpuhunt.AcceleratorVendor.NVIDIA
                 has_amd_gpu = False
diff --git a/src/dstack/_internal/server/services/resources.py b/src/dstack/_internal/server/services/resources.py
index aab47de21..8b38f92f4 100644
--- a/src/dstack/_internal/server/services/resources.py
+++ b/src/dstack/_internal/server/services/resources.py
@@ -29,9 +29,17 @@ def set_gpu_vendor_default(
     docker: Optional[bool],
 ) -> None:
     """Default GPU vendor to Nvidia when using the default CUDA image,
-    since it's only compatible with Nvidia GPUs.
-    Mirrors the client-side logic in validate_gpu_vendor_and_image().
-    Should only be called for runs (not fleets) since fleets don't have image context."""
+    since it's only compatible with Nvidia GPUs. Only called for runs
+    (not fleets) since fleets don't have image context.
+
+    The client infers the same default for display and validation
+    (see validate_gpu_vendor_and_image) but does not write it to the spec
+    for 0.19.x server compatibility. This server-side function is what
+    actually sets the vendor before offer matching.
+
+    TODO: All resource defaults and validation (gpu vendor, cpu arch, memory,
+    disk, etc.) should be set here on the server, not split between client
+    and model-level defaults."""
     gpu = resources.gpu
     if (
         gpu is not None