Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Divide as a * (1.0 / b) during weight compression #3055

Open
wants to merge 21 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ def estimate_scales(weight: Tensor, target: Tensor, zero_mask: Tensor, importanc
:param importance: The importance values tensor.
:return: The estimated scales
"""
ideal_scale = fns.abs(weight) / (fns.abs(target) + zero_mask)
ideal_scale = fns.abs(weight) * fns.reciprocal(fns.abs(target) + zero_mask)
alexsu52 marked this conversation as resolved.
Show resolved Hide resolved
weighted_scale = ideal_scale * importance
near_to_ideal_scale = fns.sum(weighted_scale, axis=2, keepdims=True)
return near_to_ideal_scale
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def calculate_signed_scale(weight: Tensor, reduction_axes: ReductionAxes, num_bi
w_max = fns.max(weight, axis=reduction_axes, keepdims=True)

scale = fns.where(w_abs_min >= w_max, w_abs_min, -w_max)
scale /= level_high
scale *= fns.reciprocal(level_high)

eps = fns.finfo(scale).eps
scale = fns.where(fns.abs(scale) < eps, eps, scale)
Expand Down Expand Up @@ -286,7 +286,6 @@ def calculate_quantized_weight(
config: WeightCompressionConfig,
scale: Tensor,
zero_point: Optional[Tensor] = None,
invert_scale=False,
) -> Tensor:
"""
Quantizes the weight tensor using the provided scale and zero point.
Expand All @@ -295,7 +294,6 @@ def calculate_quantized_weight(
:param config: Weight compression configuration.
:param scale: Scale tensor used for quantization.
:param zero_point: Zero point tensor used for quantization.
:param invert_scale: applies inversion for scale and then multiply by weights instead of division.
:return: Quantized weight tensor of uint8 or int8 type.
"""
if weight.dtype != TensorDataType.float32:
Expand All @@ -309,11 +307,7 @@ def calculate_quantized_weight(
level_low = 0 if asym_quant else -(2 ** (num_bits - 1))
level_high = 2**num_bits - 1 if asym_quant else 2 ** (num_bits - 1) - 1

if invert_scale:
scale = fns.power(scale, -1)
compressed_weights = weight * scale
else:
compressed_weights = weight / scale
compressed_weights = weight * fns.reciprocal(scale)
if zero_point is not None:
compressed_weights += zero_point.astype(weight.dtype)
compressed_weights = fns.round(compressed_weights)
Expand All @@ -328,7 +322,6 @@ def do_int_quantization(
config: WeightCompressionConfig,
precomputed_scale: Tensor = None,
precomputed_zero_point: Tensor = None,
invert_scale=False,
) -> Tuple[Tensor, Tensor, Tensor]:
"""
The method quantizes the given weights to integer data type uniformly in accordance with the compression config.
Expand All @@ -351,8 +344,6 @@ def do_int_quantization(
:param config: Information on how to compress (quantize) a specific weight.
:param precomputed_scale: Precomputed scale.
:param precomputed_zero_point: Precomputed zero point.
:param invert_scale: applies inversion for scale and then multiply by weights instead of division.
Need as reference implementation for OV.
:return: The compressed weights tensor of uint8 (asymmetric mode) or int8 (symmetric mode) type,
scale tensor of float32 type and zero point tensor of int32 type that was used for its quantization.
"""
Expand Down Expand Up @@ -380,7 +371,7 @@ def do_int_quantization(
if precomputed_zero_point is not None:
zero_point = precomputed_zero_point

compressed_weights = calculate_quantized_weight(weight, config, scale, zero_point, invert_scale)
compressed_weights = calculate_quantized_weight(weight, config, scale, zero_point)
return compressed_weights, scale, zero_point


Expand Down
4 changes: 2 additions & 2 deletions nncf/quantization/fake_quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,11 +355,11 @@ def calculate_scale_zero_point(
:return: Scale and Zero point values.
"""
levels = level_high - level_low if narrow_range else level_high - level_low + 1
scale = ((input_high - input_low) / (levels - 1)).astype(TensorDataType.float32)
scale = (input_high - input_low) * fns.reciprocal(levels - 1)
eps = fns.finfo(scale).eps
# NOTE: adding machine epsilon to avoid division by zero
scale = fns.where(fns.abs(scale) < eps, eps, scale)
expected_level_low = level_low + 1 if narrow_range else level_low
zero_point = expected_level_low - fns.round(input_low / scale)
zero_point = expected_level_low - fns.round(input_low * fns.reciprocal(scale))
zero_point = fns.clip(zero_point.astype(TensorDataType.int32), level_low, level_high)
return scale, zero_point
1 change: 1 addition & 0 deletions nncf/tensor/functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
from nncf.tensor.functions.numeric import percentile as percentile
from nncf.tensor.functions.numeric import power as power
from nncf.tensor.functions.numeric import quantile as quantile
from nncf.tensor.functions.numeric import reciprocal as reciprocal
from nncf.tensor.functions.numeric import reshape as reshape
from nncf.tensor.functions.numeric import round as round
from nncf.tensor.functions.numeric import searchsorted as searchsorted
Expand Down
14 changes: 14 additions & 0 deletions nncf/tensor/functions/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -905,3 +905,17 @@ def ceil(a: Tensor) -> Tensor:
:return: An array of the same type as a, containing the ceiling values.
"""
return Tensor(ceil(a.data))


@functools.singledispatch
@tensor_guard
def reciprocal(a: Union[Tensor, float]) -> Tensor:
"""
Compute the reciprocal of a tensor or a float.

This function returns a new tensor where each element is the reciprocal of the corresponding element in `a`.

:param a: The input tensor or float.
:return: A tensor containing the reciprocal of each element in `a`.
"""
return Tensor(1.0 / a)
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
"ipow": operator.ipow,
"itruediv": operator.itruediv,
"ifloordiv": operator.ifloordiv,
"reciprocal": lambda a, _: 1.0 / a,
}
BINARY_OPERATORS = ["add", "sub", "pow", "mul", "truediv", "floordiv"]

Expand Down
45 changes: 30 additions & 15 deletions tests/openvino/native/quantization/test_weights_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -1078,32 +1078,47 @@ def test_mixed_precision_e2m1(mode, all_layers, ratio, ref_ids):
assert ref_e8m0_nodes == names_e8m0


@pytest.mark.parametrize("mode", (CompressWeightsMode.INT4_SYM, CompressWeightsMode.INT4_ASYM))
def test_np_ov_compression_decompression(mode):
sz = 60
w = np.arange(-sz, sz).reshape(2, sz).astype(np.float32) / 9.0
@pytest.mark.parametrize("mode", [CompressWeightsMode.INT4_SYM, CompressWeightsMode.INT4_ASYM])
@pytest.mark.parametrize(
"w,s,zp",
[
(
np.array([[1.4372410774230957]], np.float32),
np.array([[-0.9581607580184937]], np.float32),
np.array([[1]], np.int32),
),
(np.arange(-60, 60).reshape(2, 60).astype(np.float32) / 9.0, None, None),
],
)
def test_np_ov_compression_decompression(mode, w, s, zp):
w = Tensor(w)
if s is not None:
s = Tensor(s)
if mode == CompressWeightsMode.INT4_SYM:
zp = None
if zp is not None:
zp = Tensor(zp)

config = WeightCompressionConfig(mode)

compressed_weighs, scale, zp = do_int_quantization(w, -1, config, invert_scale=True)
decompressed_weighs = do_int_dequantization(compressed_weighs, scale, zp)
compressed_weights, s, zp = do_int_quantization(w, -1, config, precomputed_scale=s, precomputed_zero_point=zp)
decompressed_weights = do_int_dequantization(compressed_weights, s, zp)

compressed_weighs = compressed_weighs.data
decompressed_weighs = decompressed_weighs.data
compressed_weights = compressed_weights.data
decompressed_weights = decompressed_weights.data
zp_shape = zp.shape if zp is not None else None

compress = OVWeightCompressionAlgoBackend.get_compress_pipeline(config, w.shape, scale.shape, zp_shape)
compress = OVWeightCompressionAlgoBackend.get_compress_pipeline(config, w.shape, s.shape, zp_shape)
compress_decompress = OVWeightCompressionAlgoBackend.get_compress_decompress_pipeline(
config, w.shape, scale.shape, zp_shape
config, w.shape, s.shape, zp_shape
)

params = [w.data, scale.data, zp.data] if zp is not None else [w.data, scale.data]
compressed_weighs_ov = compress(params)
decompressed_weighs_ov = compress_decompress(params)
params = [w.data, s.data, zp.data] if zp is not None else [w.data, s.data]
compressed_weights_ov = compress(params)
decompressed_weights_ov = compress_decompress(params)

assert np.allclose(compressed_weighs, compressed_weighs_ov)
assert np.allclose(decompressed_weighs, decompressed_weighs_ov)
assert np.allclose(compressed_weights, compressed_weights_ov, atol=0)
assert np.allclose(decompressed_weights, decompressed_weights_ov, atol=0)


@pytest.mark.parametrize(
Expand Down
Loading