diff --git a/README.md b/README.md index a7348d2e..a27f65a4 100644 --- a/README.md +++ b/README.md @@ -104,8 +104,10 @@ The following optional dependencies are available: - `gptq`: `GPTQModel` package for W4A16 quantization - `mx`: `microxcaling` package for MX quantization - `opt`: Shortcut for `fp8`, `gptq`, and `mx` installs +- `aiu`: `ibm-fms` package for AIU model deployment - `torchvision`: `torch` package for image recognition training and inference - `triton`: `triton` package for matrix multiplication kernels +- `examples`: Dependencies needed for examples - `visualize`: Dependencies for visualizing models and performance data - `test`: Dependencies needed for unit testing - `dev`: Dependencies needed for development diff --git a/fms_mo/aiu_addons/gptq/gptq_aiu_adapter.py b/fms_mo/aiu_addons/gptq/gptq_aiu_adapter.py index bceb5415..8409828e 100644 --- a/fms_mo/aiu_addons/gptq/gptq_aiu_adapter.py +++ b/fms_mo/aiu_addons/gptq/gptq_aiu_adapter.py @@ -17,9 +17,21 @@ from typing import Mapping # Third Party -from fms.utils import serialization import torch +# Local +from fms_mo.utils.import_utils import available_packages + +if not available_packages["fms"]: + raise ImportError( + "AIU functionality requires ibm-fms to be installed." + "See https://github.com/foundation-model-stack/foundation-model-stack for details." + ) + +# Third Party +# pylint: disable=import-error,wrong-import-position +from fms.utils import serialization + def _gptq_qweights_transpose_aiu( input_sd: Mapping[str, torch.Tensor], diff --git a/fms_mo/aiu_addons/gptq/gptq_aiu_linear.py b/fms_mo/aiu_addons/gptq/gptq_aiu_linear.py index 5bf8587a..d7e6f4cb 100644 --- a/fms_mo/aiu_addons/gptq/gptq_aiu_linear.py +++ b/fms_mo/aiu_addons/gptq/gptq_aiu_linear.py @@ -18,6 +18,19 @@ import math # Third Party +import torch + +# Local +from fms_mo.utils.import_utils import available_packages + +if not available_packages["fms"]: + raise ImportError( + "AIU functionality requires ibm-fms to be installed." + "See https://github.com/foundation-model-stack/foundation-model-stack for details." + ) + +# Third Party +# pylint: disable=import-error,wrong-import-position,ungrouped-imports from fms.modules.linear import ( LinearModuleShardingInfo, LinearParameterShardingInfo, @@ -27,7 +40,6 @@ ) from fms.modules.tp import ShardType, TPModule from fms.utils.gptq import GPTQLinearConfig -import torch # Local from fms_mo.aiu_addons.gptq.gptq_aiu_op import register_aiu_gptq_op diff --git a/fms_mo/aiu_addons/i8i8/i8i8_aiu_adapter.py b/fms_mo/aiu_addons/i8i8/i8i8_aiu_adapter.py index 6efdca80..0e11ecbe 100644 --- a/fms_mo/aiu_addons/i8i8/i8i8_aiu_adapter.py +++ b/fms_mo/aiu_addons/i8i8/i8i8_aiu_adapter.py @@ -17,9 +17,21 @@ from typing import Mapping # Third Party -from fms.utils import serialization import torch +# Local +from fms_mo.utils.import_utils import available_packages + +if not available_packages["fms"]: + raise ImportError( + "AIU functionality requires ibm-fms to be installed." + "See https://github.com/foundation-model-stack/foundation-model-stack for details." + ) + +# Third Party +# pylint: disable=import-error,wrong-import-position +from fms.utils import serialization + def _int8_qparams_aiu( input_sd: Mapping[str, torch.Tensor], diff --git a/fms_mo/aiu_addons/i8i8/i8i8_aiu_linear.py b/fms_mo/aiu_addons/i8i8/i8i8_aiu_linear.py index 04ddb153..72922edc 100644 --- a/fms_mo/aiu_addons/i8i8/i8i8_aiu_linear.py +++ b/fms_mo/aiu_addons/i8i8/i8i8_aiu_linear.py @@ -19,7 +19,17 @@ from typing import Any, Callable, Optional, Union import copy +# Local +from fms_mo.utils.import_utils import available_packages + +if not available_packages["fms"]: + raise ImportError( + "AIU functionality requires ibm-fms to be installed." + "See https://github.com/foundation-model-stack/foundation-model-stack for details." + ) + # Third Party +# pylint: disable=import-error,wrong-import-position,ungrouped-imports from fms.modules.linear import ( LinearModuleShardingInfo, LinearParameterShardingInfo, diff --git a/fms_mo/quant/ptq.py b/fms_mo/quant/ptq.py index dca1a9ef..7801284b 100644 --- a/fms_mo/quant/ptq.py +++ b/fms_mo/quant/ptq.py @@ -30,7 +30,6 @@ import sys # Third Party -from torch.utils.tensorboard import SummaryWriter from tqdm import tqdm import numpy as np import pandas as pd @@ -1449,49 +1448,57 @@ def ptq_mod_optim_lm(_model, m, layers, qcfg, optim_mode="both", **kwargs): # show loss on pbar pbar2.set_description(pbar_desc + f"{PTQloss:.6f}") - if isinstance(qcfg["tb_writer"], SummaryWriter) and isOutput: - scalars2log = {} - hist2log = {} + if available_packages["tensorboard"]: + # Third Party + from torch.utils.tensorboard import SummaryWriter - for k, v in loss4plot.items(): # plot loss - scalars2log[f"{mod_name}/PTQloss_{k}"] = v - for k, v in m.named_buffers(): # plot cv, delta, zp, alpha, and lr - if any(kb in k for kb in ["delta", "zero_point", "clip_val"]): - if len(v.shape) > 0 and v.shape[0] > 1: # perCh - hist2log[f"{mod_name}/{k}"] = v - else: - scalars2log[f"{mod_name}/{k}"] = v - for p, pname in zip( - optim_a.param_groups[0]["params"], param_names[1] - ): # cva - scalars2log[f"{mod_name}/{pname}"] = p.item() - scalars2log[f"{mod_name}/LR_cv_a"] = optim_a.param_groups[0]["lr"] - for p, pname in zip( - optim_w.param_groups[0]["params"], param_names[0] - ): # weights - hist2log[f"{mod_name}/{pname}"] = p - scalars2log[f"{mod_name}/LR_w"] = optim_w.param_groups[0]["lr"] - for p, pname in zip( - optim_w.param_groups[1]["params"], param_names[2] - ): # cvw - if "alpha" in pname: - hist2log[f"{mod_name}/{pname}"] = p - else: + if isinstance(qcfg["tb_writer"], SummaryWriter) and isOutput: + scalars2log = {} + hist2log = {} + + for k, v in loss4plot.items(): # plot loss + scalars2log[f"{mod_name}/PTQloss_{k}"] = v + for k, v in m.named_buffers(): # plot cv, delta, zp, alpha, and lr + if any(kb in k for kb in ["delta", "zero_point", "clip_val"]): + if len(v.shape) > 0 and v.shape[0] > 1: # perCh + hist2log[f"{mod_name}/{k}"] = v + else: + scalars2log[f"{mod_name}/{k}"] = v + for p, pname in zip( + optim_a.param_groups[0]["params"], param_names[1] + ): # cva scalars2log[f"{mod_name}/{pname}"] = p.item() - scalars2log[f"{mod_name}/LR_cvw"] = optim_w.param_groups[1]["lr"] - if "adaround" in qcfg["qw_mode"]: - scalars2log[f"{mod_name}/AdaR_beta"] = ( - loss_func.temp_decay.curr_beta - ) - for lidx, l in enumerate(layers): - if not hasattr(l, "quantize_m1"): - hist2log[f"{mod_name}/W{lidx}"] = l.weight + scalars2log[f"{mod_name}/LR_cv_a"] = optim_a.param_groups[0][ + "lr" + ] + for p, pname in zip( + optim_w.param_groups[0]["params"], param_names[0] + ): # weights + hist2log[f"{mod_name}/{pname}"] = p + scalars2log[f"{mod_name}/LR_w"] = optim_w.param_groups[0]["lr"] + for p, pname in zip( + optim_w.param_groups[1]["params"], param_names[2] + ): # cvw + if "alpha" in pname: + hist2log[f"{mod_name}/{pname}"] = p + else: + scalars2log[f"{mod_name}/{pname}"] = p.item() + scalars2log[f"{mod_name}/LR_cvw"] = optim_w.param_groups[1][ + "lr" + ] + if "adaround" in qcfg["qw_mode"]: + scalars2log[f"{mod_name}/AdaR_beta"] = ( + loss_func.temp_decay.curr_beta + ) + for lidx, l in enumerate(layers): + if not hasattr(l, "quantize_m1"): + hist2log[f"{mod_name}/W{lidx}"] = l.weight - # write every in one shot will mess up the folder, better write them one by one - for n, v in scalars2log.items(): - qcfg["tb_writer"].add_scalar(n, v, Niter) - for n, v in hist2log.items(): - qcfg["tb_writer"].add_histogram(n, v, Niter) + # write every in one shot will mess up the folder, better write them one by one + for n, v in scalars2log.items(): + qcfg["tb_writer"].add_scalar(n, v, Niter) + for n, v in hist2log.items(): + qcfg["tb_writer"].add_histogram(n, v, Niter) for s in scheduler: s.step() # we set up scheduler based on Nouterloop, not inner diff --git a/fms_mo/run_quant.py b/fms_mo/run_quant.py index a7a60b9b..8ed6084a 100644 --- a/fms_mo/run_quant.py +++ b/fms_mo/run_quant.py @@ -34,7 +34,6 @@ # Third Party from datasets import load_from_disk -from huggingface_hub.errors import HFValidationError from torch.cuda import OutOfMemoryError from transformers import AutoTokenizer import torch @@ -353,12 +352,6 @@ def main(): logger.error(traceback.format_exc()) write_termination_log(f"Unable to load file: {e}") sys.exit(USER_ERROR_EXIT_CODE) - except HFValidationError as e: - logger.error(traceback.format_exc()) - write_termination_log( - f"There may be a problem with loading the model. Exception: {e}" - ) - sys.exit(USER_ERROR_EXIT_CODE) except (TypeError, ValueError, EnvironmentError) as e: logger.error(traceback.format_exc()) write_termination_log( diff --git a/fms_mo/utils/import_utils.py b/fms_mo/utils/import_utils.py index 51b113ee..05f9e9a4 100644 --- a/fms_mo/utils/import_utils.py +++ b/fms_mo/utils/import_utils.py @@ -32,6 +32,7 @@ "fms", "triton", "torchvision", + "huggingface_hub", ] available_packages = {} diff --git a/pyproject.toml b/pyproject.toml index 4b43cf98..2410d4ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,29 +25,25 @@ dependencies = [ "numpy>=1.26.4,<2.3.0", "accelerate>=0.20.3,!=0.34,<1.7", "transformers>=4.45,<4.53", -"torch>=2.2.0,<2.6", +"torch>=2.2.0,<2.6", "tqdm>=4.66.2,<5.0", "datasets>=3.0.0,<4.0", -"ninja>=1.11.1.1,<2.0", -"tensorboard", -"notebook", -"evaluate", -"huggingface_hub", "pandas", "safetensors", -"ibm-fms>=0.0.8", "pkginfo>1.10", ] [project.optional-dependencies] +examples = ["ninja>=1.11.1.1,<2.0", "evaluate", "huggingface_hub"] fp8 = ["llmcompressor"] gptq = ["Cython", "gptqmodel>=1.7.3"] mx = ["microxcaling>=1.1"] opt = ["fms-model-optimizer[fp8, gptq, mx]"] +aiu = ["ibm-fms>=0.0.8"] torchvision = ["torchvision>=0.17"] flash-attn = ["flash-attn>=2.5.3,<3.0"] triton = ["triton>=3.0,<3.4"] -visualize = ["matplotlib", "graphviz", "pygraphviz"] +visualize = ["matplotlib", "graphviz", "pygraphviz", "tensorboard", "notebook"] dev = ["pre-commit>=3.0.4,<5.0"] test = ["pytest", "pillow"]