From a6da068cd7cc2f940b5b3b6778f437f4976181ee Mon Sep 17 00:00:00 2001
From: Muti Chung <mtchung037@gmail.com>
Date: Thu, 11 Dec 2025 05:35:19 +0000
Subject: [PATCH 01/10] Add convert_autoawq.py script.

Signed-off-by: Muti Chung <mtchung037@gmail.com>
---
 .../modifiers/awq/convert_autoawq.py          | 288 ++++++++++++++++++
 1 file changed, 288 insertions(+)
 create mode 100644 src/llmcompressor/modifiers/awq/convert_autoawq.py

diff --git a/src/llmcompressor/modifiers/awq/convert_autoawq.py b/src/llmcompressor/modifiers/awq/convert_autoawq.py
new file mode 100644
index 000000000..5d2716001
--- /dev/null
+++ b/src/llmcompressor/modifiers/awq/convert_autoawq.py
@@ -0,0 +1,288 @@
+import glob
+import os
+from dataclasses import dataclass, field
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from typing import Any, Literal, cast
+
+import torch
+import transformers
+from auto_round.export.export_to_awq.utils import (
+    reverse_awq_order,
+    unpack_awq,
+)
+from compressed_tensors import ModelCompressor
+from compressed_tensors.quantization import (
+    QuantizationArgs,
+    QuantizationConfig,
+    QuantizationScheme,
+    QuantizationStatus,
+    QuantizationStrategy,
+    QuantizationType,
+)
+from huggingface_hub import load_state_dict_from_file, snapshot_download
+
+
+def is_autoawq_model(model_path: Path) -> bool:
+    config = transformers.AutoConfig.from_pretrained(model_path, trust_remote_code=True)
+    if not hasattr(config, "quantization_config"):
+        return False
+
+    quantization_config = cast(dict[str, Any], config.quantization_config)
+    return quantization_config.get("quant_method") == "awq"
+
+
+def resolve_model_path(model_name_or_path: str) -> Path:
+    """Locate the model path.
+
+    If the input is a repository ID, download the model from the Hugging Face Hub and
+    return the path to the local directory.
+    """
+    if os.path.isdir(model_name_or_path):
+        return Path(model_name_or_path)
+
+    return Path(snapshot_download(model_name_or_path))
+
+
+def load_state_dict_from_model_dir(model_path: Path) -> dict[str, torch.Tensor]:
+    weight_files = glob.glob(str(model_path / "*.safetensors"))
+    if not weight_files:
+        weight_files = glob.glob(str(model_path / "*.bin"))
+
+    state_dict = {}
+    for weight_file in weight_files:
+        state_dict.update(
+            load_state_dict_from_file(
+                weight_file, map_location="cpu", weights_only=True
+            )
+        )
+    return state_dict
+
+
+def dequantize_gemm(
+    state_dict: dict[str, torch.Tensor], prefix: str, autoawq_config: dict[str, Any]
+) -> None:
+    num_bits = cast(int, autoawq_config.get("bits"))
+    group_size = cast(int, autoawq_config.get("group_size"))
+
+    qweight = state_dict.pop(f"{prefix}.qweight")
+    scales = state_dict.pop(f"{prefix}.scales")
+    qzeros = state_dict.pop(f"{prefix}.qzeros")
+
+    def dequantize_gemm_original(
+        qweight: torch.Tensor,
+        qzeros: torch.Tensor,
+        scales: torch.Tensor,
+        bits: int,
+        group_size: int,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """Modified from auto_round.export.export_to_awq.utils.dequantize_gemm."""
+        # Unpack the qweight and qzeros tensors
+        iweight, izeros = unpack_awq(qweight, qzeros, bits)
+        # Reverse the order of the iweight and izeros tensors
+        iweight, izeros = reverse_awq_order(iweight, izeros, bits)
+
+        # overflow checks
+        iweight = torch.bitwise_and(iweight, (2**bits) - 1)
+        izeros = torch.bitwise_and(izeros, (2**bits) - 1)
+
+        # fp16 weights
+        scales_interleaved = scales.repeat_interleave(group_size, dim=0)
+        izeros_interleaved = izeros.repeat_interleave(group_size, dim=0)
+        fweight = (iweight - izeros_interleaved) * scales_interleaved
+
+        return fweight, izeros
+
+    weight, zero_point = dequantize_gemm_original(
+        qweight, qzeros, scales, num_bits, group_size
+    )
+
+    # AutoAWQ uses [0, 2^bits - 1], e.g., [0, 15], for quantized weights, but
+    # compressed-tensors uses [-2^(bits - 1), 2^(bits - 1) - 1], e.g., [-8, 7].
+    # Therefore, we need to shift the zero point by 2^(bits - 1) to match the range
+    # of compressed-tensors and to allow correct quant/dequantization.
+    shifted_zero_point = zero_point - 2 ** (num_bits - 1)
+
+    state_dict.update(
+        {
+            f"{prefix}.weight": weight.T,
+            f"{prefix}.weight_scale": scales.T,
+            f"{prefix}.weight_zero_point": shifted_zero_point.T,
+        }
+    )
+
+
+def dequantize_autoawq_state_dict(
+    state_dict: dict[str, torch.Tensor], autoawq_config: dict[str, Any]
+) -> dict[str, torch.Tensor]:
+    version = cast(str, autoawq_config.get("version"))
+
+    # TODO: maybe add support for other versions?
+    match version:
+        case "gemm":
+            dequantize_fn = dequantize_gemm
+        case _:
+            raise ValueError(f"Unsupported version: {version}")
+
+    keys = list(state_dict.keys())
+    for key in filter(lambda k: k.endswith("qweight"), keys):
+        prefix = key.removesuffix(".qweight")
+        dequantize_fn(state_dict, prefix, autoawq_config)
+
+    return state_dict
+
+
+def convert_and_save(
+    model_name_or_path: str,
+    output_dir: str,
+    quantization_format: str,
+    overwrite: bool = False,
+    trust_remote_code: bool = False,
+) -> None:
+    """Convert an AutoAWQ model to a compressed model and save it.
+
+    Steps:
+
+    1. Load the model weights directly.
+    2. Dequantize the weights accordingly.
+    3. Load the model with the dequantized weights.
+    4. Add the quantization parameters to the model.
+    5. Re-pack the weights using `ModelCompressor` with the correct configuration.
+    6. Save the model to the output directory.
+
+    :param model_name_or_path: Model ID on huggingface hub or path to local model.
+    :param output_dir: Path to save the converted model.
+    :param quantization_format: Compression format to be saved.
+    :param overwrite: Overwrite the existing output directory if it exists.
+    :param trust_remote_code: Whether to trust remote code.
+    """
+    if os.path.exists(output_dir) and not overwrite:
+        raise FileExistsError(
+            f"Output directory {output_dir} already exists. Set `overwrite=True` to"
+            " overwrite the existing directory."
+        )
+
+    model_path = resolve_model_path(model_name_or_path)
+    if not is_autoawq_model(model_path):
+        raise ValueError("Model is not an AutoAWQ model")
+
+    config = transformers.AutoConfig.from_pretrained(
+        model_path, trust_remote_code=trust_remote_code
+    )
+    autoawq_config = cast(dict[str, Any], config.quantization_config)
+    num_bits = cast(int, autoawq_config.get("bits"))
+    is_symmetric = not autoawq_config.get("zero_point")
+    group_size = cast(int, autoawq_config.get("group_size"))
+
+    # TODO: check syntax of modules_to_not_convert
+    ignore = autoawq_config.get("modules_to_not_convert")
+    if ignore is None:
+        ignore = ["lm_head"]
+
+    # 1. Load the model weights directly.
+    state_dict = load_state_dict_from_model_dir(model_path)
+
+    # 2. Dequantize the weights accordingly.
+    state_dict = dequantize_autoawq_state_dict(state_dict, autoawq_config)
+
+    # 3. Load the model with the dequantized weights.
+    del config.quantization_config  # remove to avoid loading with AutoAWQ.
+    with transformers.modeling_utils.no_init_weights():
+        model = transformers.AutoModelForCausalLM.from_config(
+            config, torch_dtype=torch.float16, trust_remote_code=trust_remote_code
+        )
+
+    model.load_state_dict(state_dict, strict=False)
+
+    # 4. Add the quantization parameters to the model.
+    quantization_scheme = QuantizationScheme(
+        targets=["Linear"],
+        weights=QuantizationArgs(
+            num_bits=num_bits,
+            type=QuantizationType.INT,
+            symmetric=is_symmetric,
+            group_size=group_size,
+            strategy=QuantizationStrategy.GROUP,
+        ),
+    )
+
+    for key in filter(lambda k: k.endswith("weight_zero_point"), state_dict.keys()):
+        module_name = key.removesuffix(".weight_zero_point")
+        setattr(
+            model.get_submodule(module_name), "quantization_scheme", quantization_scheme
+        )
+
+    quant_config = QuantizationConfig(
+        config_groups={"group_0": quantization_scheme},
+        quant_method="compressed-tensors",
+        quantization_status=QuantizationStatus.COMPRESSED,
+        format=quantization_format,
+        ignore=ignore,
+    )
+
+    # 5. Re-pack the weights using `ModelCompressor`.
+    compressor = ModelCompressor(quantization_config=quant_config)
+    compressed_state_dict = compressor.compress(model, state_dict, show_progress=True)
+
+    # 6. Save the model.
+    tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path)
+    model.save_pretrained(output_dir, state_dict=compressed_state_dict)
+    tokenizer.save_pretrained(output_dir)
+    compressor.update_config(output_dir)
+
+
+def load_and_convert_from_autoawq(
+    model_name_or_path: str,
+    quantization_format: str = "pack-quantized",
+    trust_remote_code: bool = False,
+) -> transformers.modeling_utils.PreTrainedModel:
+    """
+    Load an AutoAWQ checkpoint and convert it to a compressed model.
+
+    :param model_name_or_path: Model ID on huggingface hub or path to local model.
+    :param quantization_format: Compression format to be saved.
+    :param trust_remote_code: Whether to trust remote code.
+    :return: A compressed model.
+    """
+    with TemporaryDirectory() as temp_dir:
+        convert_and_save(model_name_or_path, temp_dir, quantization_format)
+        return transformers.AutoModelForCausalLM.from_pretrained(
+            temp_dir, torch_dtype=torch.float16, trust_remote_code=trust_remote_code
+        )
+
+
+@dataclass
+class ConversionArgs:
+    model_name_or_path: str = field(
+        metadata={"help": "Model ID on huggingface hub or path to local model."},
+    )
+    output_dir: str = field(
+        metadata={"help": "Path to save the converted model."},
+    )
+    quantization_format: Literal["naive-quantized", "packed-quantized"] = field(
+        default="naive-quantized",
+        metadata={"help": "Compression format to be saved."},
+    )  # TODO: switch default to packed-quantized once supported by llm-compressor.
+    overwrite: bool = field(
+        default=False,
+        metadata={"help": "Overwrite the existing output directory if it exists."},
+    )
+    trust_remote_code: bool = field(
+        default=False,
+        metadata={"help": "Whether to trust remote code."},
+    )
+
+
+__all__ = ["convert_and_save", "load_and_convert_from_autoawq"]
+
+
+if __name__ == "__main__":
+    parser = transformers.HfArgumentParser(ConversionArgs)
+    args = parser.parse_args_into_dataclasses()[0]
+    convert_and_save(
+        args.model_name_or_path,
+        args.output_dir,
+        args.quantization_format,
+        args.overwrite,
+        args.trust_remote_code,
+    )

From e281dfaf6fd6f3a9abc75b01fb3b3c20e3cf68e2 Mon Sep 17 00:00:00 2001
From: Muti Chung <mtchung037@gmail.com>
Date: Thu, 11 Dec 2025 16:49:33 +0800
Subject: [PATCH 02/10] Remove hardcoded trust_remote_code.

Signed-off-by: Muti Chung <mtchung037@gmail.com>
---
 src/llmcompressor/modifiers/awq/convert_autoawq.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/llmcompressor/modifiers/awq/convert_autoawq.py b/src/llmcompressor/modifiers/awq/convert_autoawq.py
index 5d2716001..de88ab682 100644
--- a/src/llmcompressor/modifiers/awq/convert_autoawq.py
+++ b/src/llmcompressor/modifiers/awq/convert_autoawq.py
@@ -23,8 +23,8 @@
 from huggingface_hub import load_state_dict_from_file, snapshot_download
 
 
-def is_autoawq_model(model_path: Path) -> bool:
-    config = transformers.AutoConfig.from_pretrained(model_path, trust_remote_code=True)
+def is_autoawq_model(model_path: Path, trust_remote_code: bool = False) -> bool:
+    config = transformers.AutoConfig.from_pretrained(model_path, trust_remote_code=trust_remote_code)
     if not hasattr(config, "quantization_config"):
         return False
 
@@ -163,7 +163,7 @@ def convert_and_save(
         )
 
     model_path = resolve_model_path(model_name_or_path)
-    if not is_autoawq_model(model_path):
+    if not is_autoawq_model(model_path, trust_remote_code):
         raise ValueError("Model is not an AutoAWQ model")
 
     config = transformers.AutoConfig.from_pretrained(

From 23a9d736f56ee4aa20465dd59e087e18bac17763 Mon Sep 17 00:00:00 2001
From: Muti Chung <mtchung037@gmail.com>
Date: Thu, 11 Dec 2025 16:51:38 +0800
Subject: [PATCH 03/10] Align default format to naive.

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Signed-off-by: Muti Chung <mtchung037@gmail.com>
---
 src/llmcompressor/modifiers/awq/convert_autoawq.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llmcompressor/modifiers/awq/convert_autoawq.py b/src/llmcompressor/modifiers/awq/convert_autoawq.py
index de88ab682..5a24ce08d 100644
--- a/src/llmcompressor/modifiers/awq/convert_autoawq.py
+++ b/src/llmcompressor/modifiers/awq/convert_autoawq.py
@@ -233,7 +233,7 @@ def convert_and_save(
 
 def load_and_convert_from_autoawq(
     model_name_or_path: str,
-    quantization_format: str = "pack-quantized",
+    quantization_format: str = "naive-quantized",
     trust_remote_code: bool = False,
 ) -> transformers.modeling_utils.PreTrainedModel:
     """

From 2d11f50ddf9ac0fd86b54fc6c0792a9b75404211 Mon Sep 17 00:00:00 2001
From: Muti Chung <mtchung037@gmail.com>
Date: Fri, 12 Dec 2025 10:17:43 +0800
Subject: [PATCH 04/10] Document-wise improvements

- Add usage example in module docstring.
- Modified what to show on document page.

Signed-off-by: Muti Chung <mtchung037@gmail.com>
---
 .../modifiers/awq/convert_autoawq.py          | 47 +++++++++++++++----
 1 file changed, 38 insertions(+), 9 deletions(-)

diff --git a/src/llmcompressor/modifiers/awq/convert_autoawq.py b/src/llmcompressor/modifiers/awq/convert_autoawq.py
index 5a24ce08d..23b7b34c1 100644
--- a/src/llmcompressor/modifiers/awq/convert_autoawq.py
+++ b/src/llmcompressor/modifiers/awq/convert_autoawq.py
@@ -1,3 +1,33 @@
+"""
+Convert AutoAWQ models to llmcompressor-compatible models.
+
+This module offers the functionality to convert models quantized with AutoAWQ into
+compressed models in llmcompressor's format, which can then be served with vLLM.
+This module can be used as a CLI tool or as a Python API.
+
+## CLI Usage
+
+```sh
+python -m llmcompressor.modifiers.awq.convert_autoawq \
+  --model-name-or-path /path/to/model \
+  --output-dir /path/to/compressed/model \
+  --quantization-format naive-quantized
+```
+
+For more information, run `python -m llmcompressor.modifiers.awq.convert_autoawq --help`
+or refer to the `ConversionArgs` dataclass below.
+
+## Python API Usage
+
+```python
+from llmcompressor.modifiers.awq.convert_autoawq import load_and_convert_from_autoawq
+
+awq_model_path = "/path/to/model"  # can also be model_id on huggingface hub
+model = load_and_convert_from_autoawq(awq_model_path)
+model.generate(...)  # the converted model is now ready to be used.
+```
+"""
+
 import glob
 import os
 from dataclasses import dataclass, field
@@ -24,7 +54,9 @@
 
 
 def is_autoawq_model(model_path: Path, trust_remote_code: bool = False) -> bool:
-    config = transformers.AutoConfig.from_pretrained(model_path, trust_remote_code=trust_remote_code)
+    config = transformers.AutoConfig.from_pretrained(
+        model_path, trust_remote_code=trust_remote_code
+    )
     if not hasattr(config, "quantization_config"):
         return False
 
@@ -33,15 +65,12 @@ def is_autoawq_model(model_path: Path, trust_remote_code: bool = False) -> bool:
 
 
 def resolve_model_path(model_name_or_path: str) -> Path:
-    """Locate the model path.
-
-    If the input is a repository ID, download the model from the Hugging Face Hub and
-    return the path to the local directory.
-    """
     if os.path.isdir(model_name_or_path):
         return Path(model_name_or_path)
-
-    return Path(snapshot_download(model_name_or_path))
+    else:
+        # If the input is a model ID, download the model from the Hugging Face Hub and
+        # return the path to the local directory.
+        return Path(snapshot_download(model_name_or_path))
 
 
 def load_state_dict_from_model_dir(model_path: Path) -> dict[str, torch.Tensor]:
@@ -273,7 +302,7 @@ class ConversionArgs:
     )
 
 
-__all__ = ["convert_and_save", "load_and_convert_from_autoawq"]
+__all__ = ["convert_and_save", "load_and_convert_from_autoawq", "ConversionArgs"]
 
 
 if __name__ == "__main__":

From a147a51509387bdaea2efd4b7a610c62bb6b5c8e Mon Sep 17 00:00:00 2001
From: Muti Chung <mtchung037@gmail.com>
Date: Fri, 12 Dec 2025 10:21:11 +0800
Subject: [PATCH 05/10] Add missing `trust_remote_code`s.

Signed-off-by: Muti Chung <mtchung037@gmail.com>
---
 src/llmcompressor/modifiers/awq/convert_autoawq.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/llmcompressor/modifiers/awq/convert_autoawq.py b/src/llmcompressor/modifiers/awq/convert_autoawq.py
index 23b7b34c1..94df39dd6 100644
--- a/src/llmcompressor/modifiers/awq/convert_autoawq.py
+++ b/src/llmcompressor/modifiers/awq/convert_autoawq.py
@@ -254,7 +254,9 @@ def convert_and_save(
     compressed_state_dict = compressor.compress(model, state_dict, show_progress=True)
 
     # 6. Save the model.
-    tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path)
+    tokenizer = transformers.AutoTokenizer.from_pretrained(
+        model_name_or_path, trust_remote_code=trust_remote_code
+    )
     model.save_pretrained(output_dir, state_dict=compressed_state_dict)
     tokenizer.save_pretrained(output_dir)
     compressor.update_config(output_dir)
@@ -274,7 +276,12 @@ def load_and_convert_from_autoawq(
     :return: A compressed model.
     """
     with TemporaryDirectory() as temp_dir:
-        convert_and_save(model_name_or_path, temp_dir, quantization_format)
+        convert_and_save(
+            model_name_or_path,
+            temp_dir,
+            quantization_format,
+            trust_remote_code=trust_remote_code,
+        )
         return transformers.AutoModelForCausalLM.from_pretrained(
             temp_dir, torch_dtype=torch.float16, trust_remote_code=trust_remote_code
         )

From 717d6c8eeac74891cc20ce3c1afd6eb96b17f5c3 Mon Sep 17 00:00:00 2001
From: Muti Chung <mtchung037@gmail.com>
Date: Fri, 12 Dec 2025 17:07:51 +0800
Subject: [PATCH 06/10] Add conversion between module_to_not_convert and
 ignore.

Signed-off-by: Muti Chung <mtchung037@gmail.com>
---
 .../modifiers/awq/convert_autoawq.py           | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/llmcompressor/modifiers/awq/convert_autoawq.py b/src/llmcompressor/modifiers/awq/convert_autoawq.py
index 94df39dd6..1f58a36a4 100644
--- a/src/llmcompressor/modifiers/awq/convert_autoawq.py
+++ b/src/llmcompressor/modifiers/awq/convert_autoawq.py
@@ -203,10 +203,20 @@ def convert_and_save(
     is_symmetric = not autoawq_config.get("zero_point")
     group_size = cast(int, autoawq_config.get("group_size"))
 
-    # TODO: check syntax of modules_to_not_convert
-    ignore = autoawq_config.get("modules_to_not_convert")
-    if ignore is None:
-        ignore = ["lm_head"]
+    # Convert AutoAWQ's substring-based ignore list to llm-compressor's regex format
+    # Usage in AutoAWQ:
+    # ```python
+    # if any(key in name for key in modules_to_not_convert): ...
+    # ```
+    # See https://github.com/casper-hansen/AutoAWQ/blob/88e4c76b20755db275574e6a03c83c84ba3bece5/awq/utils/module.py#L62
+    modules_to_not_convert = autoawq_config.get("modules_to_not_convert", None)
+    ignore = []
+    if modules_to_not_convert is not None:
+        # Convert each substring pattern to a regex pattern that matches it anywhere
+        for module in modules_to_not_convert:
+            ignore.append(f"re:.*{re.escape(module)}.*")
+
+    ignore.append("lm_head")  # AutoAWQ ignores lm_head by default
 
     # 1. Load the model weights directly.
     state_dict = load_state_dict_from_model_dir(model_path)

From f997a80ffcfc12c8ee75877fc5496c85601f9e72 Mon Sep 17 00:00:00 2001
From: Muti Chung <mtchung037@gmail.com>
Date: Fri, 12 Dec 2025 09:22:58 +0000
Subject: [PATCH 07/10] Add accuracy tests.

Signed-off-by: Muti Chung <mtchung037@gmail.com>
---
 .../modifiers/awq/test_convert_autoawq.py     | 56 +++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 tests/llmcompressor/modifiers/awq/test_convert_autoawq.py

diff --git a/tests/llmcompressor/modifiers/awq/test_convert_autoawq.py b/tests/llmcompressor/modifiers/awq/test_convert_autoawq.py
new file mode 100644
index 000000000..7a9723918
--- /dev/null
+++ b/tests/llmcompressor/modifiers/awq/test_convert_autoawq.py
@@ -0,0 +1,56 @@
+from tempfile import TemporaryDirectory
+
+from lm_eval.evaluator import simple_evaluate
+
+from llmcompressor.modifiers.awq.convert_autoawq import convert_and_save
+from tests.testing_utils import requires_gpu
+
+
+def run_lm_eval(model_name_or_path: str):
+    results = simple_evaluate(
+        model="hf",
+        model_args=f"pretrained={model_name_or_path},dtype=float16",
+        tasks=["arc_challenge", "arc_easy"],
+        num_fewshot=5,
+        batch_size=16,
+    )
+
+    return results
+
+
+def compare_models(model_name_or_path: str):
+    autoawq_result = run_lm_eval(model_name_or_path)
+    with TemporaryDirectory() as converted_model_dir:
+        convert_and_save(model_name_or_path, converted_model_dir, "naive-quantized")
+        converted_result = run_lm_eval(converted_model_dir)
+
+    arc_c_autoawq = autoawq_result["results"]["arc_challenge"]["acc_norm,none"]
+    arc_c_converted = converted_result["results"]["arc_challenge"]["acc_norm,none"]
+    arc_e_autoawq = autoawq_result["results"]["arc_easy"]["acc_norm,none"]
+    arc_e_converted = converted_result["results"]["arc_easy"]["acc_norm,none"]
+
+    assert abs(arc_e_autoawq - arc_e_converted) < 1e-2, (
+        f"Arc Easy: autoawq={arc_e_autoawq} != converted={arc_e_converted}."
+    )
+    assert abs(arc_c_autoawq - arc_c_converted) < 1e-2, (
+        f"Arc Challenge: autoawq={arc_c_autoawq} != converted={arc_c_converted}."
+    )
+
+
+@requires_gpu
+def test_mistral():
+    compare_models(
+        "fbaldassarri/mistralai_Mistral-7B-Instruct-v0.3-autoawq-int4-gs128-asym"
+    )
+
+
+@requires_gpu
+def test_qwen():
+    compare_models(
+        "ruikangliu/DeepSeek-R1-Distill-Qwen-1.5B-quantized.awq-autoawq-w4g128"
+    )
+
+
+@requires_gpu
+def test_llama():
+    compare_models("AMead10/Llama-3.2-3B-Instruct-AWQ")

From 9085cc37b156aba0e9966f96ee0d6cf58b0030ed Mon Sep 17 00:00:00 2001
From: Muti Chung <mtchung037@gmail.com>
Date: Fri, 12 Dec 2025 17:52:45 +0800
Subject: [PATCH 08/10] Check if output directory is empty before overwriting

Signed-off-by: Muti Chung <mtchung037@gmail.com>
---
 src/llmcompressor/modifiers/awq/convert_autoawq.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/llmcompressor/modifiers/awq/convert_autoawq.py b/src/llmcompressor/modifiers/awq/convert_autoawq.py
index 1f58a36a4..7aa9453fa 100644
--- a/src/llmcompressor/modifiers/awq/convert_autoawq.py
+++ b/src/llmcompressor/modifiers/awq/convert_autoawq.py
@@ -185,7 +185,10 @@ def convert_and_save(
     :param overwrite: Overwrite the existing output directory if it exists.
     :param trust_remote_code: Whether to trust remote code.
     """
-    if os.path.exists(output_dir) and not overwrite:
+    is_empty_dir = (
+        os.path.isdir(output_dir) and next(os.scandir(output_dir), None) is None
+    )
+    if not is_empty_dir and not overwrite:
         raise FileExistsError(
             f"Output directory {output_dir} already exists. Set `overwrite=True` to"
             " overwrite the existing directory."

From d10e9a17a6ab9a90bc535dce4bfac4d6c8fbf81b Mon Sep 17 00:00:00 2001
From: Muti Chung <mtchung037@gmail.com>
Date: Fri, 12 Dec 2025 18:21:56 +0800
Subject: [PATCH 09/10] Add missing re import.

Signed-off-by: Muti Chung <mtchung037@gmail.com>
---
 src/llmcompressor/modifiers/awq/convert_autoawq.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/llmcompressor/modifiers/awq/convert_autoawq.py b/src/llmcompressor/modifiers/awq/convert_autoawq.py
index 7aa9453fa..21c1f7b80 100644
--- a/src/llmcompressor/modifiers/awq/convert_autoawq.py
+++ b/src/llmcompressor/modifiers/awq/convert_autoawq.py
@@ -30,6 +30,7 @@
 
 import glob
 import os
+import re
 from dataclasses import dataclass, field
 from pathlib import Path
 from tempfile import TemporaryDirectory

From 15c0ba280981116bfdf2eb917df01c938de5b1cf Mon Sep 17 00:00:00 2001
From: Muti Chung <mtchung037@gmail.com>
Date: Mon, 15 Dec 2025 10:48:15 +0800
Subject: [PATCH 10/10] Minor bug fixes.

Signed-off-by: Muti Chung <mtchung037@gmail.com>
---
 .../modifiers/awq/convert_autoawq.py           | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/llmcompressor/modifiers/awq/convert_autoawq.py b/src/llmcompressor/modifiers/awq/convert_autoawq.py
index 21c1f7b80..0ae65f134 100644
--- a/src/llmcompressor/modifiers/awq/convert_autoawq.py
+++ b/src/llmcompressor/modifiers/awq/convert_autoawq.py
@@ -186,12 +186,18 @@ def convert_and_save(
     :param overwrite: Overwrite the existing output directory if it exists.
     :param trust_remote_code: Whether to trust remote code.
     """
-    is_empty_dir = (
-        os.path.isdir(output_dir) and next(os.scandir(output_dir), None) is None
-    )
-    if not is_empty_dir and not overwrite:
+    output_exists = os.path.exists(output_dir)
+    is_directory = os.path.isdir(output_dir) if output_exists else False
+    is_empty_dir = False
+    if output_exists and is_directory:
+        is_empty_dir = not any(os.scandir(output_dir))
+
+    if not output_exists:
+        # Safe: output_dir does not exist
+        pass
+    elif not is_directory or (not is_empty_dir and not overwrite):
         raise FileExistsError(
-            f"Output directory {output_dir} already exists. Set `overwrite=True` to"
+            f"{output_dir=} already exists. Set `overwrite=True` to"
             " overwrite the existing directory."
         )
 
@@ -309,7 +315,7 @@ class ConversionArgs:
     output_dir: str = field(
         metadata={"help": "Path to save the converted model."},
     )
-    quantization_format: Literal["naive-quantized", "packed-quantized"] = field(
+    quantization_format: Literal["naive-quantized", "pack-quantized"] = field(
         default="naive-quantized",
         metadata={"help": "Compression format to be saved."},
     )  # TODO: switch default to packed-quantized once supported by llm-compressor.