Reset cache logic of weight workspace for NVFP4TensorStorage (#2524)

jinhangchoi · web-flow · commit dbd0197e7c1e · 2025-12-16T20:23:18.000-08:00
reset weight ws cache for NVFP4TensorStorage

Signed-off-by: Jinhang Choi &lt;jinhangc@nvidia.com&gt;
diff --git a/transformer_engine/pytorch/module/base.py b/transformer_engine/pytorch/module/base.py
@@ -45,6 +45,7 @@
 from ..tensor.float8_blockwise_tensor import Float8BlockQuantizer
 from ..tensor.storage.float8_tensor_storage import Float8TensorStorage
 from ..tensor.storage.mxfp8_tensor_storage import MXFP8TensorStorage
+from ..tensor.storage.nvfp4_tensor_storage import NVFP4TensorStorage
 from ..utils import (
     is_non_tn_fp8_gemm_supported,
     torch_get_autocast_gpu_dtype,
@@ -1388,6 +1389,11 @@ def get_weight_workspace(
                     reset_cache = True
                 elif quantizer.columnwise_usage and out._columnwise_data is None:
                     reset_cache = True
+            elif isinstance(out, NVFP4TensorStorage):
+                if quantizer.rowwise_usage and out._rowwise_data is None:
+                    reset_cache = True
+                elif quantizer.columnwise_usage and out._columnwise_data is None:
+                    reset_cache = True
             if isinstance(out, DebugQuantizedTensor) != isinstance(quantizer, DebugQuantizer):
                 reset_cache = True
             if reset_cache: