fix(lora): adjust convert_weight and add lora_config helper

MagellaX · MagellaX · commit 5420a5e71ecf · 2025-07-06T09:58:28.000+05:30
diff --git a/python/mlc_llm/cli/convert_weight.py b/python/mlc_llm/cli/convert_weight.py
@@ -31,6 +31,12 @@ def _parse_output(path: Union[str, Path]) -> Path:
             path.mkdir(parents=True, exist_ok=True)
         return path
 
+    def _parse_lora_adapter(path: Union[str, Path]) -> Path:
+        path = Path(path)
+        if not path.exists():
+            raise argparse.ArgumentTypeError(f"LoRA adapter path does not exist: {path}")
+        return path
+
     parser = ArgumentParser("MLC AutoLLM Quantization Framework")
     parser.add_argument(
         "config",
@@ -77,8 +83,7 @@ def _parse_output(path: Union[str, Path]) -> Path:
         required=True,
         help=HELP["output_quantize"] + " (required)",
     )
-<<<<<<< Updated upstream
-=======
+    
     # Mutually exclusive LoRA options: merge vs separate
     lora_group = parser.add_mutually_exclusive_group()
     lora_group.add_argument(
@@ -99,7 +104,6 @@ def _parse_output(path: Union[str, Path]) -> Path:
         default=1.0,
         help="Scaling factor for LoRA when used with --lora-separate (default: %(default)s).",
     )
->>>>>>> Stashed changes
 
     parsed = parser.parse_args(argv)
     parsed.source, parsed.source_format = detect_weight(
@@ -116,10 +120,7 @@ def _parse_output(path: Union[str, Path]) -> Path:
         source=parsed.source,
         source_format=parsed.source_format,
         output=parsed.output,
-<<<<<<< Updated upstream
-=======
         lora_adapter=parsed.lora_adapter,
         lora_separate=parsed.lora_separate,
         lora_alpha=parsed.lora_alpha,
->>>>>>> Stashed changes
     )
diff --git a/python/mlc_llm/interface/convert_weight.py b/python/mlc_llm/interface/convert_weight.py
@@ -5,7 +5,7 @@
 import os
 from io import StringIO
 from pathlib import Path
-from typing import Any, Dict, Iterator, Tuple
+from typing import Any, Dict, Iterator, Optional, Tuple
 
 from tvm import tir
 from tvm.contrib import tvmjs
@@ -34,14 +34,11 @@ class ConversionArgs:  # pylint: disable=too-many-instance-attributes
     source: Path
     source_format: str
     output: Path
-<<<<<<< Updated upstream
-=======
     # Legacy merge-mode
     lora_adapter: Optional[Path] = None
     # New separate-mode
     lora_separate: Optional[Path] = None
     lora_alpha: float = 1.0
->>>>>>> Stashed changes
 
     def display(self) -> None:
         """Display the arguments to stdout."""
@@ -58,20 +55,23 @@ def _device_to_str(device: Device) -> str:
         print(f"  {bold('--source'):<25} {self.source}", file=out)
         print(f"  {bold('--source-format'):<25} {self.source_format}", file=out)
         print(f"  {bold('--output'):<25} {self.output}", file=out)
-<<<<<<< Updated upstream
-=======
         if self.lora_adapter:
             print(f"  {bold('--lora-adapter'):<25} {self.lora_adapter}", file=out)
         if self.lora_separate:
             print(f"  {bold('--lora-separate'):<25} {self.lora_separate}", file=out)
             print(f"  {bold('--lora-alpha'):<25} {self.lora_alpha}", file=out)
->>>>>>> Stashed changes
         print(out.getvalue().rstrip())
 
 
+def _merge_lora_weights(args: ConversionArgs) -> Path:
+    """Merge LoRA weights into base model weights (legacy mode)."""
+    # TODO: Implement LoRA weight merging for legacy mode
+    # For now, just return the original source path
+    logger.warning("LoRA weight merging not yet implemented, using base weights only")
+    return args.source
+
+
 def _convert_args(args: ConversionArgs) -> None:  # pylint: disable=too-many-locals
-<<<<<<< Updated upstream
-=======
     # ------------------------------------------------------------------
     # Handle LoRA: separate-pack or legacy merge
     # ------------------------------------------------------------------
@@ -93,7 +93,6 @@ def _convert_args(args: ConversionArgs) -> None:  # pylint: disable=too-many-loc
         # legacy merge path (if provided)
         source_path = _merge_lora_weights(args) if args.lora_adapter else args.source
 
->>>>>>> Stashed changes
     pre_shards_num = os.getenv("MLC_INTERNAL_PRESHARD_NUM")
     # model config & quantization config
     model_config = args.model.config.from_file(args.config)
@@ -160,7 +159,7 @@ def _param_generator() -> Iterator[Tuple[str, NDArray]]:
         nonlocal total_params, total_bytes
         with Target.from_device(args.device), tqdm.redirect():
             loader = LOADER[args.source_format](
-                path=args.source,
+                path=source_path,
                 extern_param_map=args.model.source[args.source_format](
                     model_config, args.quantization
                 ),
@@ -175,13 +174,11 @@ def _param_generator() -> Iterator[Tuple[str, NDArray]]:
         total_params = loader.stats.total_param_num
 
     def _metadata_callback() -> Dict[str, Any]:
-        return {
+        metadata = {
             "ParamSize": len(param_names),
             "ParamBytes": total_bytes,
             "BitsPerParam": total_bytes * 8.0 / total_params,
         }
-<<<<<<< Updated upstream
-=======
         # Add LoRA metadata if adapter was used
         if args.lora_separate:
             metadata["LoRASeparate"] = True
@@ -191,7 +188,6 @@ def _metadata_callback() -> Dict[str, Any]:
             metadata["LoRAAdapter"] = str(args.lora_adapter)
             metadata["LoRAMerged"] = True
         return metadata
->>>>>>> Stashed changes
 
     # dump to output directory
     tvmjs.dump_ndarray_cache(
@@ -215,13 +211,10 @@ def _metadata_callback() -> Dict[str, Any]:
         green("Bits per parameter"),
         total_bytes * 8.0 / total_params,
     )
-<<<<<<< Updated upstream
-=======
     if args.lora_separate:
         logger.info("%s: %s", green("LoRA adapter packed from"), bold(str(args.lora_separate)))
     elif args.lora_adapter:
         logger.info("%s: %s", green("LoRA adapter merged from"), bold(str(args.lora_adapter)))
->>>>>>> Stashed changes
     logger.info("Saved to directory: %s", bold(str(args.output)))
 
 
@@ -233,11 +226,6 @@ def convert_weight(  # pylint: disable=too-many-arguments
     source: Path,
     source_format: str,
     output: Path,
-<<<<<<< Updated upstream
-):
-    """MLC LLM's weight conversation and quantization flow."""
-    args = ConversionArgs(config, quantization, model, device, source, source_format, output)
-=======
     lora_adapter: Optional[Path] = None,
     lora_separate: Optional[Path] = None,
     lora_alpha: float = 1.0,
@@ -255,6 +243,5 @@ def convert_weight(  # pylint: disable=too-many-arguments
         lora_separate,
         lora_alpha,
     )
->>>>>>> Stashed changes
     args.display()
     _convert_args(args)
diff --git a/python/mlc_llm/lora/__init__.py b/python/mlc_llm/lora/__init__.py
@@ -0,0 +1,11 @@
+"""LoRA (Low-Rank Adaptation) module for MLC LLM."""
+
+from .lora import upload_lora, set_lora, get_registered_lora_dirs
+from .lora_config import LoRAConfig
+
+__all__ = [
+    "upload_lora",
+    "set_lora", 
+    "get_registered_lora_dirs",
+    "LoRAConfig",
+] 
diff --git a/python/mlc_llm/lora/lora_config.py b/python/mlc_llm/lora/lora_config.py
@@ -0,0 +1,86 @@
+"""LoRA configuration dataclass for MLC LLM."""
+
+from dataclasses import dataclass
+from typing import List, Optional
+
+
+@dataclass
+class LoRAConfig:
+    """Configuration for LoRA (Low-Rank Adaptation) parameters.
+    
+    This configuration is used to define LoRA adaptation parameters
+    for fine-tuning large language models with low-rank matrices.
+    
+    Parameters
+    ----------
+    r : int
+        LoRA rank (dimension of the low-rank matrices). Common values are 4, 8, 16, 32.
+        Higher values provide more capacity but increase parameters.
+        
+    lora_alpha : float
+        LoRA scaling factor. Controls the magnitude of the LoRA adaptation.
+        Typically set to the same value as r or higher.
+        
+    lora_dropout : float
+        Dropout probability for LoRA layers during training.
+        Set to 0.0 for inference.
+        
+    target_modules : List[str]
+        List of module names to apply LoRA to.
+        Common targets: ["query_key_value", "dense", "dense_h_to_4h", "dense_4h_to_h"]
+        
+    fan_in_fan_out : bool
+        Whether the layer uses fan_in_fan_out convention.
+        Set to True for Conv1D layers, False for Linear layers.
+        
+    bias : str
+        Bias type for LoRA layers. Options: "none", "all", "lora_only"
+        
+    task_type : Optional[str]
+        Task type for the LoRA adaptation (e.g., "CAUSAL_LM")
+        
+    inference_mode : bool
+        Whether the model is in inference mode.
+        
+    merge_weights : bool
+        Whether to merge LoRA weights into base weights during inference.
+    """
+    
+    r: int = 8
+    lora_alpha: float = 16.0
+    lora_dropout: float = 0.1
+    target_modules: List[str] = None
+    fan_in_fan_out: bool = False
+    bias: str = "none"
+    task_type: Optional[str] = None
+    inference_mode: bool = False
+    merge_weights: bool = True
+    
+    def __post_init__(self):
+        """Set default target modules if not provided."""
+        if self.target_modules is None:
+            self.target_modules = ["query_key_value", "dense", "dense_h_to_4h", "dense_4h_to_h"]
+    
+    @property
+    def scaling(self) -> float:
+        """Return the scaling factor for LoRA: alpha / r."""
+        return self.lora_alpha / self.r
+    
+    def to_dict(self) -> dict:
+        """Convert configuration to dictionary."""
+        return {
+            "r": self.r,
+            "lora_alpha": self.lora_alpha,
+            "lora_dropout": self.lora_dropout,
+            "target_modules": self.target_modules,
+            "fan_in_fan_out": self.fan_in_fan_out,
+            "bias": self.bias,
+            "task_type": self.task_type,
+            "inference_mode": self.inference_mode,
+            "merge_weights": self.merge_weights,
+        }
+    
+    @classmethod
+    def from_dict(cls, config_dict: dict) -> "LoRAConfig":
+        """Create configuration from dictionary."""
+        return cls(**config_dict)