66import queue
77import sys
88import weakref
9+ from pathlib import Path
910from typing import (
1011 Any ,
1112 AsyncGenerator ,
2122
2223from tvm .runtime import Device
2324
25+ from mlc_llm .lora import upload_lora
2426from mlc_llm .protocol import debug_protocol , openai_api_protocol
2527from mlc_llm .protocol .generation_config import GenerationConfig
2628from mlc_llm .serve import data , engine_utils
@@ -903,8 +905,6 @@ def __init__( # pylint: disable=too-many-arguments,too-many-locals
903905 )
904906 self .chat = AsyncChat (weakref .ref (self ))
905907 self .completions = AsyncCompletion (weakref .ref (self ))
906- < << << << Updated upstream
907- == == == =
908908 # Upload LoRA adapters – two modes:
909909 # 1. Separate artifacts recorded in metadata (preferred).
910910 # 2. Explicit list from engine_config (legacy / tests).
@@ -921,7 +921,6 @@ def __init__( # pylint: disable=too-many-arguments,too-many-locals
921921 else :
922922 for d in getattr (engine_config , "lora_dirs" , []):
923923 upload_lora (d , device = self .device )
924- >> >> >> > Stashed changes
925924
926925 async def abort (self , request_id : str ) -> None :
927926 """Generation abortion interface.
@@ -1493,8 +1492,6 @@ def __init__( # pylint: disable=too-many-arguments,too-many-locals
14931492 )
14941493 self .chat = Chat (weakref .ref (self ))
14951494 self .completions = Completion (weakref .ref (self ))
1496- < << << << Updated upstream
1497- == == == =
14981495 # Upload LoRA adapters – two modes:
14991496 # 1. Separate artifacts recorded in metadata (preferred).
15001497 # 2. Explicit list from engine_config (legacy / tests).
@@ -1511,7 +1508,6 @@ def __init__( # pylint: disable=too-many-arguments,too-many-locals
15111508 else :
15121509 for d in getattr (engine_config , "lora_dirs" , []):
15131510 upload_lora (d , device = self .device )
1514- >> >> >> > Stashed changes
15151511
15161512 def abort (self , request_id : str ) -> None :
15171513 """Generation abortion interface.
0 commit comments