|
1 | | -from unittest.mock import MagicMock, patch |
| 1 | +# from unittest.mock import MagicMock, patch |
2 | 2 |
|
3 | | -import pytest |
| 3 | +# import pytest |
4 | 4 |
|
5 | | -from promptolution.llms import VLLM |
| 5 | +# from promptolution.llms import VLLM |
6 | 6 |
|
7 | | -vllm = pytest.importorskip("vllm") |
8 | | -transformers = pytest.importorskip("transformers") |
| 7 | +# # vllm = pytest.importorskip("vllm") |
| 8 | +# transformers = pytest.importorskip("transformers") |
9 | 9 |
|
10 | 10 |
|
11 | | -@pytest.fixture |
12 | | -def mock_vllm_dependencies(): |
13 | | - """Set up comprehensive mocks for VLLM dependencies.""" |
14 | | - # Mock the key components |
15 | | - with patch("vllm.LLM") as mock_llm_class, patch("vllm.SamplingParams") as mock_sampling_params, patch( |
16 | | - "transformers.AutoTokenizer" |
17 | | - ) as mock_tokenizer_class: |
18 | | - # Create and configure mock LLM |
19 | | - mock_llm = MagicMock() |
20 | | - mock_llm_class.return_value = mock_llm |
| 11 | +# @pytest.fixture |
| 12 | +# def mock_vllm_dependencies(): |
| 13 | +# """Set up comprehensive mocks for VLLM dependencies.""" |
| 14 | +# # Mock the key components |
| 15 | +# with patch("vllm.LLM") as mock_llm_class, patch("vllm.SamplingParams") as mock_sampling_params, patch( |
| 16 | +# "transformers.AutoTokenizer" |
| 17 | +# ) as mock_tokenizer_class: |
| 18 | +# # Create and configure mock LLM |
| 19 | +# mock_llm = MagicMock() |
| 20 | +# mock_llm_class.return_value = mock_llm |
21 | 21 |
|
22 | | - # Configure LLM engine with cache config for batch size calculation |
23 | | - mock_cache_config = MagicMock() |
24 | | - mock_cache_config.num_gpu_blocks = 100 |
25 | | - mock_cache_config.block_size = 16 |
| 22 | +# # Configure LLM engine with cache config for batch size calculation |
| 23 | +# mock_cache_config = MagicMock() |
| 24 | +# mock_cache_config.num_gpu_blocks = 100 |
| 25 | +# mock_cache_config.block_size = 16 |
26 | 26 |
|
27 | | - mock_executor = MagicMock() |
28 | | - mock_executor.cache_config = mock_cache_config |
| 27 | +# mock_executor = MagicMock() |
| 28 | +# mock_executor.cache_config = mock_cache_config |
29 | 29 |
|
30 | | - mock_engine = MagicMock() |
31 | | - mock_engine.model_executor = mock_executor |
| 30 | +# mock_engine = MagicMock() |
| 31 | +# mock_engine.model_executor = mock_executor |
32 | 32 |
|
33 | | - mock_llm.llm_engine = mock_engine |
| 33 | +# mock_llm.llm_engine = mock_engine |
34 | 34 |
|
35 | | - # Set up the generate method to return appropriate number of responses |
36 | | - def mock_generate_side_effect(prompts_list, *args, **kwargs): |
37 | | - """Return one output per input prompt""" |
38 | | - return [ |
39 | | - MagicMock(outputs=[MagicMock(text=f"Mocked response for prompt {i}")]) |
40 | | - for i, _ in enumerate(prompts_list) |
41 | | - ] |
| 35 | +# # Set up the generate method to return appropriate number of responses |
| 36 | +# def mock_generate_side_effect(prompts_list, *args, **kwargs): |
| 37 | +# """Return one output per input prompt""" |
| 38 | +# return [ |
| 39 | +# MagicMock(outputs=[MagicMock(text=f"Mocked response for prompt {i}")]) |
| 40 | +# for i, _ in enumerate(prompts_list) |
| 41 | +# ] |
42 | 42 |
|
43 | | - # Use side_effect instead of return_value for dynamic behavior |
44 | | - mock_llm.generate.side_effect = mock_generate_side_effect |
| 43 | +# # Use side_effect instead of return_value for dynamic behavior |
| 44 | +# mock_llm.generate.side_effect = mock_generate_side_effect |
45 | 45 |
|
46 | | - # Configure mock tokenizer |
47 | | - mock_tokenizer = MagicMock() |
48 | | - mock_tokenizer.encode.return_value = [1, 2, 3, 4, 5] |
49 | | - mock_tokenizer.apply_chat_template.return_value = "<mocked_chat_template>" |
50 | | - mock_tokenizer_class.from_pretrained.return_value = mock_tokenizer |
| 46 | +# # Configure mock tokenizer |
| 47 | +# mock_tokenizer = MagicMock() |
| 48 | +# mock_tokenizer.encode.return_value = [1, 2, 3, 4, 5] |
| 49 | +# mock_tokenizer.apply_chat_template.return_value = "<mocked_chat_template>" |
| 50 | +# mock_tokenizer_class.from_pretrained.return_value = mock_tokenizer |
51 | 51 |
|
52 | | - yield { |
53 | | - "llm_class": mock_llm_class, |
54 | | - "llm": mock_llm, |
55 | | - "tokenizer_class": mock_tokenizer_class, |
56 | | - "tokenizer": mock_tokenizer, |
57 | | - "sampling_params": mock_sampling_params, |
58 | | - } |
| 52 | +# yield { |
| 53 | +# "llm_class": mock_llm_class, |
| 54 | +# "llm": mock_llm, |
| 55 | +# "tokenizer_class": mock_tokenizer_class, |
| 56 | +# "tokenizer": mock_tokenizer, |
| 57 | +# "sampling_params": mock_sampling_params, |
| 58 | +# } |
59 | 59 |
|
60 | 60 |
|
61 | | -def test_vllm_get_response(mock_vllm_dependencies): |
62 | | - """Test that VLLM._get_response works correctly with explicit batch_size.""" |
63 | | - # Create VLLM instance with explicit batch_size to avoid calculation |
64 | | - vllm = VLLM(model_id="mock-model", batch_size=4) # Set an explicit batch_size to avoid computation |
| 61 | +# def test_vllm_get_response(mock_vllm_dependencies): |
| 62 | +# """Test that VLLM._get_response works correctly with explicit batch_size.""" |
| 63 | +# # Create VLLM instance with explicit batch_size to avoid calculation |
| 64 | +# vllm = VLLM(model_id="mock-model", batch_size=4) # Set an explicit batch_size to avoid computation |
65 | 65 |
|
66 | | - # Call get_response |
67 | | - prompts = ["Test prompt 1", "Test prompt 2"] |
68 | | - system_prompts = ["Be helpful", "Be concise"] |
69 | | - responses = vllm._get_response(prompts, system_prompts) |
| 66 | +# # Call get_response |
| 67 | +# prompts = ["Test prompt 1", "Test prompt 2"] |
| 68 | +# system_prompts = ["Be helpful", "Be concise"] |
| 69 | +# responses = vllm._get_response(prompts, system_prompts) |
70 | 70 |
|
71 | | - # Verify tokenizer was used correctly |
72 | | - assert mock_vllm_dependencies["tokenizer"].apply_chat_template.call_count == 2 |
| 71 | +# # Verify tokenizer was used correctly |
| 72 | +# assert mock_vllm_dependencies["tokenizer"].apply_chat_template.call_count == 2 |
73 | 73 |
|
74 | | - # Verify LLM generate was called |
75 | | - mock_vllm_dependencies["llm"].generate.assert_called_once() |
| 74 | +# # Verify LLM generate was called |
| 75 | +# mock_vllm_dependencies["llm"].generate.assert_called_once() |
76 | 76 |
|
77 | | - # Verify responses |
78 | | - assert len(responses) == 2 |
79 | | - assert responses[0] == "Mocked response for prompt 0" |
80 | | - assert responses[1] == "Mocked response for prompt 1" |
| 77 | +# # Verify responses |
| 78 | +# assert len(responses) == 2 |
| 79 | +# assert responses[0] == "Mocked response for prompt 0" |
| 80 | +# assert responses[1] == "Mocked response for prompt 1" |
81 | 81 |
|
82 | 82 |
|
83 | | -def test_vllm_with_auto_batch_size(mock_vllm_dependencies): |
84 | | - """Test VLLM with automatic batch size calculation.""" |
85 | | - # Create VLLM instance with batch_size=None to trigger auto calculation |
86 | | - vllm = VLLM(model_id="mock-model", batch_size=None, max_model_len=2048) |
| 83 | +# def test_vllm_with_auto_batch_size(mock_vllm_dependencies): |
| 84 | +# """Test VLLM with automatic batch size calculation.""" |
| 85 | +# # Create VLLM instance with batch_size=None to trigger auto calculation |
| 86 | +# vllm = VLLM(model_id="mock-model", batch_size=None, max_model_len=2048) |
87 | 87 |
|
88 | | - # Force a non-zero batch size |
89 | | - mock_vllm_dependencies["llm"].llm_engine.model_executor.cache_config.num_gpu_blocks = 1000 |
| 88 | +# # Force a non-zero batch size |
| 89 | +# mock_vllm_dependencies["llm"].llm_engine.model_executor.cache_config.num_gpu_blocks = 1000 |
90 | 90 |
|
91 | | - # Create a new instance to recalculate batch size |
92 | | - vllm = VLLM(model_id="mock-model", batch_size=None, max_model_len=2048) |
| 91 | +# # Create a new instance to recalculate batch size |
| 92 | +# vllm = VLLM(model_id="mock-model", batch_size=None, max_model_len=2048) |
93 | 93 |
|
94 | | - # Verify batch_size is greater than zero |
95 | | - assert vllm.batch_size > 0, "Batch size should be greater than zero" |
| 94 | +# # Verify batch_size is greater than zero |
| 95 | +# assert vllm.batch_size > 0, "Batch size should be greater than zero" |
96 | 96 |
|
97 | | - # Test with a single prompt |
98 | | - prompts = ["Test prompt"] |
99 | | - system_prompts = ["Be helpful"] |
100 | | - responses = vllm._get_response(prompts, system_prompts) |
| 97 | +# # Test with a single prompt |
| 98 | +# prompts = ["Test prompt"] |
| 99 | +# system_prompts = ["Be helpful"] |
| 100 | +# responses = vllm._get_response(prompts, system_prompts) |
101 | 101 |
|
102 | | - # Verify we get exactly one response for one prompt |
103 | | - assert len(responses) == 1 |
104 | | - assert responses[0] == "Mocked response for prompt 0" |
| 102 | +# # Verify we get exactly one response for one prompt |
| 103 | +# assert len(responses) == 1 |
| 104 | +# assert responses[0] == "Mocked response for prompt 0" |
0 commit comments