Skip to content

Commit 0b6a8a3

Browse files
authored
[BugFix] Fix non detected failing tests (#30277)
Signed-off-by: ilmarkov <markovilya197@gmail.com>
1 parent 804e346 commit 0b6a8a3

File tree

6 files changed

+77
-38
lines changed

6 files changed

+77
-38
lines changed

.buildkite/test-pipeline.yaml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,9 @@ steps:
468468
# tests covered elsewhere.
469469
# Use `find` to launch multiple instances of pytest so that
470470
# they do not suffer from https://github.com/vllm-project/vllm/issues/28965
471-
- "find compile/ -maxdepth 1 -name 'test_*.py' -exec pytest -s -v {} \\\\;"
471+
# However, find does not normally propagate error codes, so we combine it with xargs
472+
# (using -0 for proper path handling)
473+
- "find compile/ -maxdepth 1 -name 'test_*.py' -print0 | xargs -0 -n1 -I{} pytest -s -v '{}'"
472474

473475
- label: PyTorch Fullgraph Smoke Test # 15min
474476
timeout_in_minutes: 30
@@ -482,7 +484,9 @@ steps:
482484
# as it is a heavy test that is covered in other steps.
483485
# Use `find` to launch multiple instances of pytest so that
484486
# they do not suffer from https://github.com/vllm-project/vllm/issues/28965
485-
- "find compile/fullgraph/ -name 'test_*.py' -not -name 'test_full_graph.py' -exec pytest -s -v {} \\\\;"
487+
# However, find does not normally propagate error codes, so we combine it with xargs
488+
# (using -0 for proper path handling)
489+
- "find compile/fullgraph -maxdepth 1 -name 'test_*.py' -not -name 'test_full_graph.py' -print0 | xargs -0 -n1 -I{} pytest -s -v '{}'"
486490

487491
- label: PyTorch Fullgraph Test # 27min
488492
timeout_in_minutes: 40

tests/compile/fullgraph/test_multimodal_compile.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ def test_compile():
1717
# forked needed to workaround https://github.com/vllm-project/vllm/issues/21073
1818
@pytest.mark.forked
1919
@pytest.mark.skipif(not current_platform.is_cuda(), reason="Skip if not cuda")
20-
@pytest.mark.xfail
2120
def test_qwen2_5_vl_compilation(vllm_runner, monkeypatch):
2221
"""Test that Qwen2.5-VL vision submodules are compiled.
2322

tests/compile/test_compile_ranges.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ def test_compile_ranges(use_fresh_inductor_cache):
8080
vllm_config = VllmConfig(
8181
scheduler_config=SchedulerConfig(
8282
max_num_batched_tokens=8192,
83+
max_model_len=8192,
84+
is_encoder_decoder=False,
8385
),
8486
compilation_config=CompilationConfig(
8587
mode=CompilationMode.VLLM_COMPILE,
@@ -112,6 +114,8 @@ def test_compile_config_get_compile_ranges():
112114
VllmConfig(
113115
scheduler_config=SchedulerConfig(
114116
max_num_batched_tokens=8192,
117+
max_model_len=8192,
118+
is_encoder_decoder=False,
115119
),
116120
compilation_config=compilation_config,
117121
)
@@ -134,6 +138,8 @@ def test_inductor_cache_compile_ranges(monkeypatch, use_fresh_inductor_cache):
134138
)
135139
scheduler_config = SchedulerConfig(
136140
max_num_batched_tokens=8192,
141+
max_model_len=8192,
142+
is_encoder_decoder=False,
137143
)
138144
torch.set_default_device("cuda")
139145

tests/compile/test_pass_manager.py

Lines changed: 40 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,14 @@
55
import pytest
66
import torch
77

8-
from vllm.compilation.inductor_pass import CallableInductorPass, InductorPass
8+
from vllm.compilation.inductor_pass import (
9+
CallableInductorPass,
10+
InductorPass,
11+
pass_context,
12+
)
913
from vllm.compilation.pass_manager import PostGradPassManager
1014
from vllm.config import ModelConfig, VllmConfig
15+
from vllm.config.utils import Range
1116

1217

1318
# dummy custom pass that doesn't inherit
@@ -42,35 +47,37 @@ def __call__(self, graph: torch.fx.graph.Graph) -> None:
4247
],
4348
)
4449
def test_pass_manager_uuid(callable):
45-
# Some passes need dtype to be set
46-
config = VllmConfig(model_config=ModelConfig(dtype=torch.bfloat16))
47-
48-
pass_manager = PostGradPassManager()
49-
pass_manager.configure(config)
50-
51-
# Check that UUID is different if the same pass is added 2x
52-
pass_manager.add(callable)
53-
uuid1 = pass_manager.uuid()
54-
pass_manager.add(callable)
55-
uuid2 = pass_manager.uuid()
56-
assert uuid1 != uuid2
57-
58-
# UUID should be the same as the original one,
59-
# as we constructed in the same way.
60-
pass_manager2 = PostGradPassManager()
61-
pass_manager2.configure(config)
62-
pass_manager2.add(callable)
63-
assert uuid1 == pass_manager2.uuid()
64-
65-
# UUID should be different due to config change
66-
config2 = copy.deepcopy(config)
67-
config2.compilation_config.pass_config.fuse_norm_quant = (
68-
not config2.compilation_config.pass_config.fuse_norm_quant
69-
)
70-
config2.compilation_config.pass_config.fuse_act_quant = (
71-
not config2.compilation_config.pass_config.fuse_act_quant
72-
)
73-
pass_manager3 = PostGradPassManager()
74-
pass_manager3.configure(config2)
75-
pass_manager3.add(callable)
76-
assert uuid1 != pass_manager3.uuid()
50+
# Set the pass context as PassManager uuid uses it
51+
with pass_context(Range(start=1, end=8)):
52+
# Some passes need dtype to be set
53+
config = VllmConfig(model_config=ModelConfig(dtype=torch.bfloat16))
54+
55+
pass_manager = PostGradPassManager()
56+
pass_manager.configure(config)
57+
58+
# Check that UUID is different if the same pass is added 2x
59+
pass_manager.add(callable)
60+
uuid1 = pass_manager.uuid()
61+
pass_manager.add(callable)
62+
uuid2 = pass_manager.uuid()
63+
assert uuid1 != uuid2
64+
65+
# UUID should be the same as the original one,
66+
# as we constructed in the same way.
67+
pass_manager2 = PostGradPassManager()
68+
pass_manager2.configure(config)
69+
pass_manager2.add(callable)
70+
assert uuid1 == pass_manager2.uuid()
71+
72+
# UUID should be different due to config change
73+
config2 = copy.deepcopy(config)
74+
config2.compilation_config.pass_config.fuse_norm_quant = (
75+
not config2.compilation_config.pass_config.fuse_norm_quant
76+
)
77+
config2.compilation_config.pass_config.fuse_act_quant = (
78+
not config2.compilation_config.pass_config.fuse_act_quant
79+
)
80+
pass_manager3 = PostGradPassManager()
81+
pass_manager3.configure(config2)
82+
pass_manager3.add(callable)
83+
assert uuid1 != pass_manager3.uuid()

vllm/compilation/inductor_pass.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,26 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

4+
from __future__ import annotations
5+
46
import functools
57
import hashlib
68
import inspect
79
import json
810
import types
911
from collections.abc import Callable
1012
from contextlib import contextmanager
11-
from typing import Any
13+
from typing import TYPE_CHECKING, Any
1214

1315
import torch
1416
from torch import fx
1517
from torch._subclasses.fake_tensor import FakeTensorMode, unset_fake_temporarily
1618

17-
from vllm.config.utils import Range
1819
from vllm.utils.torch_utils import is_torch_equal_or_newer
1920

21+
if TYPE_CHECKING:
22+
from vllm.config.utils import Range
23+
2024
if is_torch_equal_or_newer("2.6"):
2125
from torch._inductor.custom_graph_pass import CustomGraphPass
2226
else:

vllm/compilation/piecewise_backend.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,27 @@ def __init__(
5353
self.is_last_graph = piecewise_compile_index == total_piecewise_compiles - 1
5454

5555
self.is_full_graph = total_piecewise_compiles == 1
56+
# TODO: we need to generalize encoder compilation to other models
57+
self.is_encoder_compilation = vllm_backend.prefix in [
58+
"Qwen2_5_VisionPatchEmbed",
59+
"Qwen2_5_VisionPatchMerger",
60+
"Qwen2_5_VisionBlock",
61+
]
5662

5763
self.compile_ranges = self.compilation_config.get_compile_ranges()
64+
if self.is_encoder_compilation:
65+
# For encoder compilation we use the max int32 value
66+
# to set the upper bound of the compile ranges
67+
max_int32 = 2**31 - 1
68+
last_compile_range = self.compile_ranges[-1]
69+
assert (
70+
last_compile_range.end
71+
== vllm_config.scheduler_config.max_num_batched_tokens
72+
)
73+
self.compile_ranges[-1] = Range(
74+
start=last_compile_range.start, end=max_int32
75+
)
76+
5877
log_string = f"PiecewiseBackend: compile_ranges: {self.compile_ranges}"
5978
logger.debug_once(log_string)
6079

0 commit comments

Comments
 (0)