diff --git a/src/transformers/models/telechat2/modeling_telechat2.py b/src/transformers/models/telechat2/modeling_telechat2.py index 42f4bf35675b4a..e4290d9931237e 100644 --- a/src/transformers/models/telechat2/modeling_telechat2.py +++ b/src/transformers/models/telechat2/modeling_telechat2.py @@ -373,6 +373,7 @@ def _dynamic_frequency_update(self, position_ids, device): if seq_len < self.original_max_seq_len and self.max_seq_len_cached > self.original_max_seq_len: # reset # This .to() is needed if the model has been moved to a device after being initialized (because # the buffer is automatically moved, but not the original copy) + self.original_inv_freq = self.original_inv_freq.to(device) self.register_buffer("inv_freq", self.original_inv_freq, persistent=False) self.max_seq_len_cached = self.original_max_seq_len