[BugFix] Fix DeepSeek-R1 hang with DP and MTP (#30119)

LucasWilkinson · tlrmchlsmth · MatthewBonanni · web-flow · commit 95501a70ec69 · 2025-12-09T18:51:19.000Z
Signed-off-by: Lucas Wilkinson &lt;lwilkins@redhat.com&gt;
Signed-off-by: Lucas Wilkinson &lt;LucasWilkinson@users.noreply.github.com&gt;
Co-authored-by: Tyler Michael Smith &lt;tyler@neuralmagic.com&gt;
Co-authored-by: Matthew Bonanni &lt;mbonanni@redhat.com&gt;
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
@@ -4168,10 +4168,19 @@ def _dummy_run(
 
             if self.speculative_config and self.speculative_config.use_eagle():
                 assert isinstance(self.drafter, EagleProposer)
+                # Eagle currently only supports PIECEWISE cudagraphs.
+                # Therefore only use cudagraphs if the main model uses PIECEWISE
+                # NOTE(lucas): this is a hack, need to clean up.
                 use_cudagraphs = (
-                    cudagraph_runtime_mode.has_mode(CUDAGraphMode.PIECEWISE)
-                    and not self.speculative_config.enforce_eager
-                )
+                    (
+                        is_graph_capturing
+                        and cudagraph_runtime_mode == CUDAGraphMode.PIECEWISE
+                    )
+                    or (
+                        not is_graph_capturing
+                        and cudagraph_runtime_mode != CUDAGraphMode.NONE
+                    )
+                ) and not self.speculative_config.enforce_eager
 
                 # Note(gnovack) - We need to disable cudagraphs for one of the two
                 # lora cases when cudagraph_specialize_lora is enabled. This is a