Skip to content

Commit 56b5dfc

Browse files
committed
fix(lora): update serving engine for LoRA integration
1 parent df742ed commit 56b5dfc

File tree

1 file changed

+2
-6
lines changed

1 file changed

+2
-6
lines changed

python/mlc_llm/serve/engine.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import queue
77
import sys
88
import weakref
9+
from pathlib import Path
910
from typing import (
1011
Any,
1112
AsyncGenerator,
@@ -21,6 +22,7 @@
2122

2223
from tvm.runtime import Device
2324

25+
from mlc_llm.lora import upload_lora
2426
from mlc_llm.protocol import debug_protocol, openai_api_protocol
2527
from mlc_llm.protocol.generation_config import GenerationConfig
2628
from mlc_llm.serve import data, engine_utils
@@ -903,8 +905,6 @@ def __init__( # pylint: disable=too-many-arguments,too-many-locals
903905
)
904906
self.chat = AsyncChat(weakref.ref(self))
905907
self.completions = AsyncCompletion(weakref.ref(self))
906-
<<<<<<< Updated upstream
907-
=======
908908
# Upload LoRA adapters – two modes:
909909
# 1. Separate artifacts recorded in metadata (preferred).
910910
# 2. Explicit list from engine_config (legacy / tests).
@@ -921,7 +921,6 @@ def __init__( # pylint: disable=too-many-arguments,too-many-locals
921921
else:
922922
for d in getattr(engine_config, "lora_dirs", []):
923923
upload_lora(d, device=self.device)
924-
>>>>>>> Stashed changes
925924

926925
async def abort(self, request_id: str) -> None:
927926
"""Generation abortion interface.
@@ -1493,8 +1492,6 @@ def __init__( # pylint: disable=too-many-arguments,too-many-locals
14931492
)
14941493
self.chat = Chat(weakref.ref(self))
14951494
self.completions = Completion(weakref.ref(self))
1496-
<<<<<<< Updated upstream
1497-
=======
14981495
# Upload LoRA adapters – two modes:
14991496
# 1. Separate artifacts recorded in metadata (preferred).
15001497
# 2. Explicit list from engine_config (legacy / tests).
@@ -1511,7 +1508,6 @@ def __init__( # pylint: disable=too-many-arguments,too-many-locals
15111508
else:
15121509
for d in getattr(engine_config, "lora_dirs", []):
15131510
upload_lora(d, device=self.device)
1514-
>>>>>>> Stashed changes
15151511

15161512
def abort(self, request_id: str) -> None:
15171513
"""Generation abortion interface.

0 commit comments

Comments
 (0)