NVIDIA · sugunav14 · Apr 8, 2026 · Feb 6, 2026 · Feb 13, 2026 · Feb 4, 2026
@@ -75,6 +75,7 @@ repos:
         # Instead, we should manually add the license header to those files *after* the original header.
         exclude: >
           (?x)^(
+              modelopt/torch/quantization/utils/calib_utils.py|
               modelopt/onnx/quantization/operators.py|
               modelopt/onnx/quantization/ort_patching.py|
               modelopt/torch/_deploy/utils/onnx_utils.py|

@@ -1503,24 +1503,20 @@ class SVDQuantConfig(QuantizeAlgorithmConfig):
     )
 
 
-class GPTQLiteConfig(QuantizeAlgorithmConfig):
-    """The config for GPTQ lite.
+class GPTQCalibConfig(QuantizeAlgorithmConfig):
+    """The config for GPTQ quantization.
 
-    GPTQ lite is a variant of GPTQ that does not exactly follow the official GPTQ implementation.
-
-    GPTQ lite does not perform sequential quantization of layers. This means that the updated
-    activations are not used to process the next layer.
+    GPTQ minimizes the layer-wise quantization error by using second-order (Hessian) information
+    to perform blockwise weight updates that compensate for rounding loss. Layers are quantized
+    sequentially so that each layer's Hessian is computed from activations that already reflect
+    the quantization of preceding layers.
 
     The default values are taken from the official GPTQ implementation:
     https://github.com/IST-DASLab/FP-Quant/blob/d2e3092f968262c4de5fb050e1aef568a280dadd/src/quantization/gptq.py#L35
-
-    Note: This feature is currently experimental and may not translate to improved accuracy as expected.
-
-
     """
 
-    method: Literal["gptq_lite"] = ModeloptField("gptq_lite")
-    percdamp: float | None = ModeloptField(
+    method: Literal["gptq"] = ModeloptField("gptq")
+    perc_damp: float | None = ModeloptField(
         default=0.01,
         gt=0.0,
         le=1.0,
@@ -1533,12 +1529,6 @@ class GPTQLiteConfig(QuantizeAlgorithmConfig):
         description="""The block size for GPTQ weight update, which must be a multiple of the
         group_size used in the quantization.""",
     )
-    hessian_state_path: str | None = ModeloptField(
-        default=None,
-        title="Path to the Hessian state file.",
-        description="""The path to the Hessian state file. If hessian path exists, we load from
-         hessian file instead of recomputing them.""",
-    )
 
 
 QuantizeQuantCfgType = list[QuantizerCfgEntry]

@@ -37,7 +37,7 @@
     AWQFullCalibConfig,
     AWQLiteCalibConfig,
     CompressConfig,
-    GPTQLiteConfig,
+    GPTQCalibConfig,
     LocalHessianCalibConfig,
     MaxCalibConfig,
     MseCalibConfig,
@@ -59,7 +59,7 @@
 )
 from .model_calib import (
     awq,
-    gptq_lite,
+    gptq,
     local_hessian_calibrate,
     max_calibrate,
     mse_calibrate,
@@ -240,8 +240,8 @@ def wrapped_calib_func(
         if sequential:
             if forward_loop is None:
                 raise ValueError("forward_loop is required for calibration but got None.")
-            assert method in ["max"], (
-                f"Sequential calibration currently only supports max calibration, got {method}"
+            assert method in ["max", "gptq"], (
+                f"Sequential calibration currently only supports max and gptq calibration, got {method}"
             )
             # Wrap with sequential processing
             sequential_calibrate(
@@ -493,12 +493,12 @@ def restore(self) -> RestoreEntrypoint:
 
 
 @CalibrateModeRegistry.register_mode
-class GPTQLiteModeDescriptor(BaseCalibrateModeDescriptor):
+class GPTQModeDescriptor(BaseCalibrateModeDescriptor):
     """Mode for GPTQ calibration algorithm."""
 
     @property
     def config_class(self) -> type[QuantizeAlgorithmConfig]:
         """Specifies the config class for the mode."""
-        return GPTQLiteConfig
+        return GPTQCalibConfig
 
-    _calib_func = gptq_lite
+    _calib_func = gptq