Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions fms_mo/custom_ext_kernels/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,8 +517,8 @@ def exllama_ops_load_and_reg(qcfg=None, run_unit_test=False):
need_registration = False
else:
need_registration = (
available_packages["exllama_kernels"]
and available_packages["exllamav2_kernels"]
available_packages["gptqmodel_exllama_kernels"]
and available_packages["gptqmodel_exllamav2_kernels"]
)

if not need_registration:
Expand Down
22 changes: 15 additions & 7 deletions fms_mo/modules/linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -1583,7 +1583,13 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
return x.to(in_dtype)


try:
gptq_available = (
available_packages["gptqmodel"]
and available_packages["gptqmodel_exllama_kernels"]
and available_packages["gptqmodel_exllamav2_kernels"]
)

if gptq_available:
# Third Party
from gptqmodel.nn_modules.qlinear.exllama import (
ExllamaQuantLinear as QLinearExllamaV1,
Expand Down Expand Up @@ -1882,12 +1888,6 @@ def forward(self, x, force_cuda=False):
x.add_(self.bias)
return x

except ModuleNotFoundError:
logger.warning(
"GPTQModel is not properly installed. "
"QLinearExv1WI4AF16 and QLinearExv2WI4AF16 wrappers will not be available."
)


class LinearFuncFPxFwdBwd(torch.autograd.Function):
"""Linear function using FP24 accumulation, experimental only.
Expand Down Expand Up @@ -2355,6 +2355,14 @@ def extra_repr(self) -> str:
if available_packages["mx"]:
QLinear_modules += (QLinearMX,)

if gptq_available:
QLinear_modules += (
QLinearExllamaV1,
QLinearExllamaV2,
QLinearExv1WI4AF16,
QLinearExv2WI4AF16,
)


def isinstance_qlinear(module):
"""
Expand Down
77 changes: 39 additions & 38 deletions fms_mo/utils/custom_gptq_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,41 +14,42 @@

"""Allow users to add new GPTQ classes for their custom models easily."""

# Third Party
from gptqmodel.models.base import BaseGPTQModel


class GraniteGPTQForCausalLM(BaseGPTQModel):
"""Enable Granite for GPTQ."""

layer_type = "GraniteDecoderLayer"
layers_node = "model.layers"
base_modules = ["model.embed_tokens", "model.norm"]
layer_modules = [
["self_attn.k_proj", "self_attn.v_proj", "self_attn.q_proj"],
["self_attn.o_proj"],
["mlp.up_proj", "mlp.gate_proj"],
["mlp.down_proj"],
]


class GraniteMoeGPTQForCausalLM(BaseGPTQModel):
"""Enable Granite MOE for GPTQ."""

layer_type = "GraniteMoeDecoderLayer"
layers_node = "model.layers"
base_modules = ["model.embed_tokens", "model.norm"]
layer_modules = [
["self_attn.k_proj", "self_attn.v_proj", "self_attn.q_proj"],
["self_attn.o_proj"],
["block_sparse_moe.input_linear", "block_sparse_moe.output_linear"],
]


# NOTE: Keys in this table are huggingface config."model_type" (see the corresponding field in
# config.json). Make sure you cover the ones in the model family you want to use, as they may
# not be under the same model_type. See Granite as an example.
custom_gptq_classes = {
# "granite": GraniteGPTQForCausalLM,
"granitemoe": GraniteMoeGPTQForCausalLM,
}
# Local
from fms_mo.utils.import_utils import available_packages

if available_packages["gptqmodel"]:
# Third Party
from gptqmodel.models.base import BaseGPTQModel

class GraniteGPTQForCausalLM(BaseGPTQModel):
"""Enable Granite for GPTQ."""

layer_type = "GraniteDecoderLayer"
layers_node = "model.layers"
base_modules = ["model.embed_tokens", "model.norm"]
layer_modules = [
["self_attn.k_proj", "self_attn.v_proj", "self_attn.q_proj"],
["self_attn.o_proj"],
["mlp.up_proj", "mlp.gate_proj"],
["mlp.down_proj"],
]

class GraniteMoeGPTQForCausalLM(BaseGPTQModel):
"""Enable Granite MOE for GPTQ."""

layer_type = "GraniteMoeDecoderLayer"
layers_node = "model.layers"
base_modules = ["model.embed_tokens", "model.norm"]
layer_modules = [
["self_attn.k_proj", "self_attn.v_proj", "self_attn.q_proj"],
["self_attn.o_proj"],
["block_sparse_moe.input_linear", "block_sparse_moe.output_linear"],
]

# NOTE: Keys in this table are huggingface config."model_type" (see the corresponding field in
# config.json). Make sure you cover the ones in the model family you want to use,
# as they may not be under the same model_type. See Granite as an example.
custom_gptq_classes = {
# "granite": GraniteGPTQForCausalLM,
"granitemoe": GraniteMoeGPTQForCausalLM,
}
Loading