From f8998f4a8faa96dd7ae615e5431bc40f0893ffdf Mon Sep 17 00:00:00 2001
From: Rahul Tuli <rahul@neuralmagic.com>
Date: Fri, 10 Jan 2025 15:38:50 +0000
Subject: [PATCH 1/3] Update: test for new transformers release

---
 tests/test_quantization/lifecycle/test_apply.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tests/test_quantization/lifecycle/test_apply.py b/tests/test_quantization/lifecycle/test_apply.py
index edcc2919..412ff6cc 100644
--- a/tests/test_quantization/lifecycle/test_apply.py
+++ b/tests/test_quantization/lifecycle/test_apply.py
@@ -31,8 +31,9 @@
     is_sparse_target,
 )
 from compressed_tensors.quantization.utils import iter_named_leaf_modules
+from packaging import version
 from tests.testing_utils import requires_accelerate
-from transformers import AutoModelForCausalLM
+from transformers import AutoModelForCausalLM, __version__
 
 
 @pytest.fixture
@@ -138,7 +139,12 @@ def test_apply_quantization_config_tinyllama():
     # sanity check correct number of layers targeted
     assert num_linears == 154  # 155 Linear layers - 1 that gets ignored
     assert num_embeddings == 1
-    assert num_rotary_embeddings == 23  # model updated, now has model.rotary_embedding
+
+    # Handle num_rotary_embeddings based on transformers version
+    if version.parse(__version__) < version.parse("4.48"):
+        assert num_rotary_embeddings == 23
+    else:
+        assert num_rotary_embeddings == 1
 
     # test quantization compression
     # sample forward pass to fill scales, zps

From 0a33bc25b1fe15227b368af8e9a7f4046d488343 Mon Sep 17 00:00:00 2001
From: George Ohashi <george@neuralmagic.com>
Date: Fri, 10 Jan 2025 10:44:50 -0500
Subject: [PATCH 2/3] fix bug

---
 .../test_quantization/lifecycle/test_apply.py | 37 ++++++++++---------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/tests/test_quantization/lifecycle/test_apply.py b/tests/test_quantization/lifecycle/test_apply.py
index edcc2919..ee77fc0b 100644
--- a/tests/test_quantization/lifecycle/test_apply.py
+++ b/tests/test_quantization/lifecycle/test_apply.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import re
+from collections import defaultdict
 from typing import Optional
 from unittest.mock import MagicMock
 
@@ -114,31 +115,33 @@ def test_apply_quantization_config_tinyllama():
     for module in model.modules():
         _test_layer_quantization_status(module, inputs=False, weights=False)
 
+    count_layer_names = ("Linear", "Embeddidng", "LlamaRotaryEmbedding")
+    count_layer_num = defaultdict(int)
+
+    for name, module in model.named_modules():
+        if name in quant_config.ignore:
+            continue
+        module_type = module.__class__.__name__
+        if module_type in count_layer_names:
+            count_layer_num[module_type] += 1
+
     # apply quant config to model
     apply_quantization_config(model, quant_config)
 
     # check for correct application of quant config
-    num_linears = 0
-    num_embeddings = 0
-    num_rotary_embeddings = 0
     for name, module in model.named_modules():
         if name in quant_config.ignore:
             continue
         module_type = module.__class__.__name__
-        if module_type == "Linear":
-            num_linears += 1
-            _test_layer_quantization_status(module, inputs=True, weights=True)
-        elif module_type == "Embedding":
-            num_embeddings += 1
-            _test_layer_quantization_status(module, inputs=False, weights=True)
-        elif module_type == "LlamaRotaryEmbedding":
-            num_rotary_embeddings += 1
-            _test_layer_quantization_status(module, inputs=False, weights=False)
-
-    # sanity check correct number of layers targeted
-    assert num_linears == 154  # 155 Linear layers - 1 that gets ignored
-    assert num_embeddings == 1
-    assert num_rotary_embeddings == 23  # model updated, now has model.rotary_embedding
+        if module_type in count_layer_names:
+            count_layer_num[module_type] -= 1
+            _inputs = module_type == "Linear"
+            _weights = not module_type == "LlamaRotaryEmbedding"
+            _test_layer_quantization_status(module, inputs=_inputs, weights=_weights)
+
+    assert all(
+        value == 0 for value in count_layer_num.values()
+    ), "Not all values are zero"
 
     # test quantization compression
     # sample forward pass to fill scales, zps

From da7db8d83771035c50127e8a4355fb6ac8613f57 Mon Sep 17 00:00:00 2001
From: George Ohashi <george@neuralmagic.com>
Date: Fri, 10 Jan 2025 10:55:38 -0500
Subject: [PATCH 3/3] check layers exist with counting

---
 tests/test_quantization/lifecycle/test_apply.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/test_quantization/lifecycle/test_apply.py b/tests/test_quantization/lifecycle/test_apply.py
index ee77fc0b..241a2276 100644
--- a/tests/test_quantization/lifecycle/test_apply.py
+++ b/tests/test_quantization/lifecycle/test_apply.py
@@ -125,6 +125,9 @@ def test_apply_quantization_config_tinyllama():
         if module_type in count_layer_names:
             count_layer_num[module_type] += 1
 
+    assert len(count_layer_num) > 0, f"None of {count_layer_names} found in model"
+    assert all(value > 0 for value in count_layer_num.values()),
+
     # apply quant config to model
     apply_quantization_config(model, quant_config)