Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,10 @@ The following optional dependencies are available:
- `gptq`: `GPTQModel` package for W4A16 quantization
- `mx`: `microxcaling` package for MX quantization
- `opt`: Shortcut for `fp8`, `gptq`, and `mx` installs
- `aiu`: `ibm-fms` package for AIU model deployment
- `torchvision`: `torch` package for image recognition training and inference
- `triton`: `triton` package for matrix multiplication kernels
- `examples`: Dependencies needed for examples
- `visualize`: Dependencies for visualizing models and performance data
- `test`: Dependencies needed for unit testing
- `dev`: Dependencies needed for development
Expand Down
14 changes: 13 additions & 1 deletion fms_mo/aiu_addons/gptq/gptq_aiu_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,21 @@
from typing import Mapping

# Third Party
from fms.utils import serialization
import torch

# Local
from fms_mo.utils.import_utils import available_packages

if not available_packages["fms"]:
raise ImportError(
"AIU functionality requires ibm-fms to be installed."
"See https://github.com/foundation-model-stack/foundation-model-stack for details."
)

# Third Party
# pylint: disable=import-error,wrong-import-position
from fms.utils import serialization


def _gptq_qweights_transpose_aiu(
input_sd: Mapping[str, torch.Tensor],
Expand Down
14 changes: 13 additions & 1 deletion fms_mo/aiu_addons/gptq/gptq_aiu_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,19 @@
import math

# Third Party
import torch

# Local
from fms_mo.utils.import_utils import available_packages

if not available_packages["fms"]:
raise ImportError(
"AIU functionality requires ibm-fms to be installed."
"See https://github.com/foundation-model-stack/foundation-model-stack for details."
)

# Third Party
# pylint: disable=import-error,wrong-import-position,ungrouped-imports
from fms.modules.linear import (
LinearModuleShardingInfo,
LinearParameterShardingInfo,
Expand All @@ -27,7 +40,6 @@
)
from fms.modules.tp import ShardType, TPModule
from fms.utils.gptq import GPTQLinearConfig
import torch

# Local
from fms_mo.aiu_addons.gptq.gptq_aiu_op import register_aiu_gptq_op
Expand Down
14 changes: 13 additions & 1 deletion fms_mo/aiu_addons/i8i8/i8i8_aiu_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,21 @@
from typing import Mapping

# Third Party
from fms.utils import serialization
import torch

# Local
from fms_mo.utils.import_utils import available_packages

if not available_packages["fms"]:
raise ImportError(
"AIU functionality requires ibm-fms to be installed."
"See https://github.com/foundation-model-stack/foundation-model-stack for details."
)

# Third Party
# pylint: disable=import-error,wrong-import-position
from fms.utils import serialization


def _int8_qparams_aiu(
input_sd: Mapping[str, torch.Tensor],
Expand Down
10 changes: 10 additions & 0 deletions fms_mo/aiu_addons/i8i8/i8i8_aiu_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,17 @@
from typing import Any, Callable, Optional, Union
import copy

# Local
from fms_mo.utils.import_utils import available_packages

if not available_packages["fms"]:
raise ImportError(
"AIU functionality requires ibm-fms to be installed."
"See https://github.com/foundation-model-stack/foundation-model-stack for details."
)

# Third Party
# pylint: disable=import-error,wrong-import-position,ungrouped-imports
from fms.modules.linear import (
LinearModuleShardingInfo,
LinearParameterShardingInfo,
Expand Down
89 changes: 48 additions & 41 deletions fms_mo/quant/ptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
import sys

# Third Party
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -1449,49 +1448,57 @@ def ptq_mod_optim_lm(_model, m, layers, qcfg, optim_mode="both", **kwargs):
# show loss on pbar
pbar2.set_description(pbar_desc + f"{PTQloss:.6f}")

if isinstance(qcfg["tb_writer"], SummaryWriter) and isOutput:
scalars2log = {}
hist2log = {}
if available_packages["tensorboard"]:
# Third Party
from torch.utils.tensorboard import SummaryWriter

for k, v in loss4plot.items(): # plot loss
scalars2log[f"{mod_name}/PTQloss_{k}"] = v
for k, v in m.named_buffers(): # plot cv, delta, zp, alpha, and lr
if any(kb in k for kb in ["delta", "zero_point", "clip_val"]):
if len(v.shape) > 0 and v.shape[0] > 1: # perCh
hist2log[f"{mod_name}/{k}"] = v
else:
scalars2log[f"{mod_name}/{k}"] = v
for p, pname in zip(
optim_a.param_groups[0]["params"], param_names[1]
): # cva
scalars2log[f"{mod_name}/{pname}"] = p.item()
scalars2log[f"{mod_name}/LR_cv_a"] = optim_a.param_groups[0]["lr"]
for p, pname in zip(
optim_w.param_groups[0]["params"], param_names[0]
): # weights
hist2log[f"{mod_name}/{pname}"] = p
scalars2log[f"{mod_name}/LR_w"] = optim_w.param_groups[0]["lr"]
for p, pname in zip(
optim_w.param_groups[1]["params"], param_names[2]
): # cvw
if "alpha" in pname:
hist2log[f"{mod_name}/{pname}"] = p
else:
if isinstance(qcfg["tb_writer"], SummaryWriter) and isOutput:
scalars2log = {}
hist2log = {}

for k, v in loss4plot.items(): # plot loss
scalars2log[f"{mod_name}/PTQloss_{k}"] = v
for k, v in m.named_buffers(): # plot cv, delta, zp, alpha, and lr
if any(kb in k for kb in ["delta", "zero_point", "clip_val"]):
if len(v.shape) > 0 and v.shape[0] > 1: # perCh
hist2log[f"{mod_name}/{k}"] = v
else:
scalars2log[f"{mod_name}/{k}"] = v
for p, pname in zip(
optim_a.param_groups[0]["params"], param_names[1]
): # cva
scalars2log[f"{mod_name}/{pname}"] = p.item()
scalars2log[f"{mod_name}/LR_cvw"] = optim_w.param_groups[1]["lr"]
if "adaround" in qcfg["qw_mode"]:
scalars2log[f"{mod_name}/AdaR_beta"] = (
loss_func.temp_decay.curr_beta
)
for lidx, l in enumerate(layers):
if not hasattr(l, "quantize_m1"):
hist2log[f"{mod_name}/W{lidx}"] = l.weight
scalars2log[f"{mod_name}/LR_cv_a"] = optim_a.param_groups[0][
"lr"
]
for p, pname in zip(
optim_w.param_groups[0]["params"], param_names[0]
): # weights
hist2log[f"{mod_name}/{pname}"] = p
scalars2log[f"{mod_name}/LR_w"] = optim_w.param_groups[0]["lr"]
for p, pname in zip(
optim_w.param_groups[1]["params"], param_names[2]
): # cvw
if "alpha" in pname:
hist2log[f"{mod_name}/{pname}"] = p
else:
scalars2log[f"{mod_name}/{pname}"] = p.item()
scalars2log[f"{mod_name}/LR_cvw"] = optim_w.param_groups[1][
"lr"
]
if "adaround" in qcfg["qw_mode"]:
scalars2log[f"{mod_name}/AdaR_beta"] = (
loss_func.temp_decay.curr_beta
)
for lidx, l in enumerate(layers):
if not hasattr(l, "quantize_m1"):
hist2log[f"{mod_name}/W{lidx}"] = l.weight

# write every in one shot will mess up the folder, better write them one by one
for n, v in scalars2log.items():
qcfg["tb_writer"].add_scalar(n, v, Niter)
for n, v in hist2log.items():
qcfg["tb_writer"].add_histogram(n, v, Niter)
# write every in one shot will mess up the folder, better write them one by one
for n, v in scalars2log.items():
qcfg["tb_writer"].add_scalar(n, v, Niter)
for n, v in hist2log.items():
qcfg["tb_writer"].add_histogram(n, v, Niter)

for s in scheduler:
s.step() # we set up scheduler based on Nouterloop, not inner
Expand Down
7 changes: 0 additions & 7 deletions fms_mo/run_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@

# Third Party
from datasets import load_from_disk
from huggingface_hub.errors import HFValidationError
from torch.cuda import OutOfMemoryError
from transformers import AutoTokenizer
import torch
Expand Down Expand Up @@ -353,12 +352,6 @@ def main():
logger.error(traceback.format_exc())
write_termination_log(f"Unable to load file: {e}")
sys.exit(USER_ERROR_EXIT_CODE)
except HFValidationError as e:
logger.error(traceback.format_exc())
write_termination_log(
f"There may be a problem with loading the model. Exception: {e}"
)
sys.exit(USER_ERROR_EXIT_CODE)
except (TypeError, ValueError, EnvironmentError) as e:
logger.error(traceback.format_exc())
write_termination_log(
Expand Down
1 change: 1 addition & 0 deletions fms_mo/utils/import_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
"fms",
"triton",
"torchvision",
"huggingface_hub",
]

available_packages = {}
Expand Down
12 changes: 4 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,29 +25,25 @@ dependencies = [
"numpy>=1.26.4,<2.3.0",
"accelerate>=0.20.3,!=0.34,<1.7",
"transformers>=4.45,<4.53",
"torch>=2.2.0,<2.6",
"torch>=2.2.0,<2.6",
"tqdm>=4.66.2,<5.0",
"datasets>=3.0.0,<4.0",
"ninja>=1.11.1.1,<2.0",
"tensorboard",
"notebook",
"evaluate",
"huggingface_hub",
"pandas",
"safetensors",
"ibm-fms>=0.0.8",
"pkginfo>1.10",
]

[project.optional-dependencies]
examples = ["ninja>=1.11.1.1,<2.0", "evaluate", "huggingface_hub"]
fp8 = ["llmcompressor"]
gptq = ["Cython", "gptqmodel>=1.7.3"]
mx = ["microxcaling>=1.1"]
opt = ["fms-model-optimizer[fp8, gptq, mx]"]
aiu = ["ibm-fms>=0.0.8"]
torchvision = ["torchvision>=0.17"]
flash-attn = ["flash-attn>=2.5.3,<3.0"]
triton = ["triton>=3.0,<3.4"]
visualize = ["matplotlib", "graphviz", "pygraphviz"]
visualize = ["matplotlib", "graphviz", "pygraphviz", "tensorboard", "notebook"]
dev = ["pre-commit>=3.0.4,<5.0"]
test = ["pytest", "pillow"]

Expand Down
Loading