Skip to content

Commit 95501a7

Browse files
LucasWilkinsontlrmchlsmthMatthewBonanni
authored
[BugFix] Fix DeepSeek-R1 hang with DP and MTP (#30119)
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com> Signed-off-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com> Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com> Co-authored-by: Matthew Bonanni <mbonanni@redhat.com>
1 parent e858bfe commit 95501a7

File tree

1 file changed

+12
-3
lines changed

1 file changed

+12
-3
lines changed

vllm/v1/worker/gpu_model_runner.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4168,10 +4168,19 @@ def _dummy_run(
41684168

41694169
if self.speculative_config and self.speculative_config.use_eagle():
41704170
assert isinstance(self.drafter, EagleProposer)
4171+
# Eagle currently only supports PIECEWISE cudagraphs.
4172+
# Therefore only use cudagraphs if the main model uses PIECEWISE
4173+
# NOTE(lucas): this is a hack, need to clean up.
41714174
use_cudagraphs = (
4172-
cudagraph_runtime_mode.has_mode(CUDAGraphMode.PIECEWISE)
4173-
and not self.speculative_config.enforce_eager
4174-
)
4175+
(
4176+
is_graph_capturing
4177+
and cudagraph_runtime_mode == CUDAGraphMode.PIECEWISE
4178+
)
4179+
or (
4180+
not is_graph_capturing
4181+
and cudagraph_runtime_mode != CUDAGraphMode.NONE
4182+
)
4183+
) and not self.speculative_config.enforce_eager
41754184

41764185
# Note(gnovack) - We need to disable cudagraphs for one of the two
41774186
# lora cases when cudagraph_specialize_lora is enabled. This is a

0 commit comments

Comments
 (0)