From f8998f4a8faa96dd7ae615e5431bc40f0893ffdf Mon Sep 17 00:00:00 2001 From: Rahul Tuli Date: Fri, 10 Jan 2025 15:38:50 +0000 Subject: [PATCH 1/3] Update: test for new transformers release --- tests/test_quantization/lifecycle/test_apply.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/test_quantization/lifecycle/test_apply.py b/tests/test_quantization/lifecycle/test_apply.py index edcc2919..412ff6cc 100644 --- a/tests/test_quantization/lifecycle/test_apply.py +++ b/tests/test_quantization/lifecycle/test_apply.py @@ -31,8 +31,9 @@ is_sparse_target, ) from compressed_tensors.quantization.utils import iter_named_leaf_modules +from packaging import version from tests.testing_utils import requires_accelerate -from transformers import AutoModelForCausalLM +from transformers import AutoModelForCausalLM, __version__ @pytest.fixture @@ -138,7 +139,12 @@ def test_apply_quantization_config_tinyllama(): # sanity check correct number of layers targeted assert num_linears == 154 # 155 Linear layers - 1 that gets ignored assert num_embeddings == 1 - assert num_rotary_embeddings == 23 # model updated, now has model.rotary_embedding + + # Handle num_rotary_embeddings based on transformers version + if version.parse(__version__) < version.parse("4.48"): + assert num_rotary_embeddings == 23 + else: + assert num_rotary_embeddings == 1 # test quantization compression # sample forward pass to fill scales, zps From 0a33bc25b1fe15227b368af8e9a7f4046d488343 Mon Sep 17 00:00:00 2001 From: George Ohashi Date: Fri, 10 Jan 2025 10:44:50 -0500 Subject: [PATCH 2/3] fix bug --- .../test_quantization/lifecycle/test_apply.py | 37 ++++++++++--------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/tests/test_quantization/lifecycle/test_apply.py b/tests/test_quantization/lifecycle/test_apply.py index edcc2919..ee77fc0b 100644 --- a/tests/test_quantization/lifecycle/test_apply.py +++ b/tests/test_quantization/lifecycle/test_apply.py @@ -13,6 +13,7 @@ # limitations under the License. import re +from collections import defaultdict from typing import Optional from unittest.mock import MagicMock @@ -114,31 +115,33 @@ def test_apply_quantization_config_tinyllama(): for module in model.modules(): _test_layer_quantization_status(module, inputs=False, weights=False) + count_layer_names = ("Linear", "Embeddidng", "LlamaRotaryEmbedding") + count_layer_num = defaultdict(int) + + for name, module in model.named_modules(): + if name in quant_config.ignore: + continue + module_type = module.__class__.__name__ + if module_type in count_layer_names: + count_layer_num[module_type] += 1 + # apply quant config to model apply_quantization_config(model, quant_config) # check for correct application of quant config - num_linears = 0 - num_embeddings = 0 - num_rotary_embeddings = 0 for name, module in model.named_modules(): if name in quant_config.ignore: continue module_type = module.__class__.__name__ - if module_type == "Linear": - num_linears += 1 - _test_layer_quantization_status(module, inputs=True, weights=True) - elif module_type == "Embedding": - num_embeddings += 1 - _test_layer_quantization_status(module, inputs=False, weights=True) - elif module_type == "LlamaRotaryEmbedding": - num_rotary_embeddings += 1 - _test_layer_quantization_status(module, inputs=False, weights=False) - - # sanity check correct number of layers targeted - assert num_linears == 154 # 155 Linear layers - 1 that gets ignored - assert num_embeddings == 1 - assert num_rotary_embeddings == 23 # model updated, now has model.rotary_embedding + if module_type in count_layer_names: + count_layer_num[module_type] -= 1 + _inputs = module_type == "Linear" + _weights = not module_type == "LlamaRotaryEmbedding" + _test_layer_quantization_status(module, inputs=_inputs, weights=_weights) + + assert all( + value == 0 for value in count_layer_num.values() + ), "Not all values are zero" # test quantization compression # sample forward pass to fill scales, zps From da7db8d83771035c50127e8a4355fb6ac8613f57 Mon Sep 17 00:00:00 2001 From: George Ohashi Date: Fri, 10 Jan 2025 10:55:38 -0500 Subject: [PATCH 3/3] check layers exist with counting --- tests/test_quantization/lifecycle/test_apply.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_quantization/lifecycle/test_apply.py b/tests/test_quantization/lifecycle/test_apply.py index ee77fc0b..241a2276 100644 --- a/tests/test_quantization/lifecycle/test_apply.py +++ b/tests/test_quantization/lifecycle/test_apply.py @@ -125,6 +125,9 @@ def test_apply_quantization_config_tinyllama(): if module_type in count_layer_names: count_layer_num[module_type] += 1 + assert len(count_layer_num) > 0, f"None of {count_layer_names} found in model" + assert all(value > 0 for value in count_layer_num.values()), + # apply quant config to model apply_quantization_config(model, quant_config)