From 7abd94ef94ac167c2964a59a889e499335caddf3 Mon Sep 17 00:00:00 2001
From: hellozhaoming <747247642@qq.com>
Date: Fri, 27 Oct 2023 17:16:40 +0800
Subject: [PATCH 1/3] add swish activation function

Signed-off-by: hellozhaoming <747247642@qq.com>
---
 deepmd/common.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/deepmd/common.py b/deepmd/common.py
index 472508bb08..afa2004731 100644
--- a/deepmd/common.py
+++ b/deepmd/common.py
@@ -125,6 +125,7 @@ def gelu_wrapper(x):
     "softplus": tf.nn.softplus,
     "sigmoid": tf.sigmoid,
     "tanh": tf.nn.tanh,
+    "swish": tf.nn.swish,
     "gelu": gelu,
     "gelu_tf": gelu_tf,
     "None": None,

From 3f3449c65dfe6cf9345b3c3418431cef93b8bd7f Mon Sep 17 00:00:00 2001
From: Zhaoming Xia <747247642@qq.com>
Date: Fri, 27 Oct 2023 17:51:25 +0800
Subject: [PATCH 2/3] Add cosine restart learning rate

Add cosine restart learning rate
---
 deepmd/train/trainer.py       |  10 ++
 deepmd/utils/argcheck.py      |  31 +++++-
 deepmd/utils/learning_rate.py | 171 ++++++++++++++++++++++++++++++++++
 3 files changed, 211 insertions(+), 1 deletion(-)

diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py
index 1f7b78045b..d99f657838 100644
--- a/deepmd/train/trainer.py
+++ b/deepmd/train/trainer.py
@@ -58,6 +58,8 @@
 )
 from deepmd.utils.learning_rate import (
     LearningRateExp,
+    LearningRateCos,
+    LearningRateCosRestarts,
 )
 from deepmd.utils.sess import (
     run_sess,
@@ -118,6 +120,14 @@ def get_lr_and_coef(lr_param):
                 lr = LearningRateExp(
                     lr_param["start_lr"], lr_param["stop_lr"], lr_param["decay_steps"]
                 )
+            elif self.lr_type == "cos":
+                lr = LearningRateCos(
+                    lr_param["start_lr"], lr_param["stop_lr"], lr_param["decay_steps"]
+                )
+            elif self.lr_type == "cosrestart":
+                lr = LearningRateCosRestarts(
+                    lr_param["start_lr"], lr_param["stop_lr"], lr_param["decay_steps"]
+                )
             else:
                 raise RuntimeError("unknown learning_rate type " + lr_type)
             return lr, scale_lr_coef
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index ae446ef348..52fd462a2a 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -1010,13 +1010,42 @@ def learning_rate_exp():
     ]
     return args
 
+def learning_rate_cos():
+    doc_start_lr = "The learning rate the start of the training."
+    doc_stop_lr = "The desired learning rate at the end of the training."
+    doc_decay_steps = (
+        "Number of steps to decay over."
+    )
+
+    args = [
+        Argument("start_lr", float, optional=True, default=1e-3, doc=doc_start_lr),
+        Argument("stop_lr", float, optional=True, default=1e-8, doc=doc_stop_lr),
+        Argument("decay_steps", int, optional=True, default=100000, doc=doc_decay_steps),
+    ]
+    return args
+
+def learning_rate_cosrestarts():
+    doc_start_lr = "The learning rate the start of the training."
+    doc_stop_lr = "The desired learning rate at the end of the training."
+    doc_decay_steps = (
+        "Number of steps to decay over of the first decay."
+    )
+
+    args = [
+        Argument("start_lr", float, optional=True, default=1e-3, doc=doc_start_lr),
+        Argument("stop_lr", float, optional=True, default=1e-8, doc=doc_stop_lr),
+        Argument("decay_steps", int, optional=True, default=10000, doc=doc_decay_steps),
+    ]
+    return args
 
 def learning_rate_variant_type_args():
     doc_lr = "The type of the learning rate."
 
     return Variant(
         "type",
-        [Argument("exp", dict, learning_rate_exp())],
+        [Argument("exp", dict, learning_rate_exp()),
+         Argument("cos", dict, learning_rate_cos()),
+         Argument("cosrestart", dict, learning_rate_cosrestarts())],
         optional=True,
         default_tag="exp",
         doc=doc_lr,
diff --git a/deepmd/utils/learning_rate.py b/deepmd/utils/learning_rate.py
index 5bec5120cd..687dae37d2 100644
--- a/deepmd/utils/learning_rate.py
+++ b/deepmd/utils/learning_rate.py
@@ -105,3 +105,174 @@ def start_lr(self) -> float:
     def value(self, step: int) -> float:
         """Get the lr at a certain step."""
         return self.start_lr_ * np.power(self.decay_rate_, (step // self.decay_steps_))
+    
+class LearningRateCos:
+    r"""The cosine decaying learning rate.
+
+  The function returns the decayed learning rate.  It is computed as:
+  ```python
+  global_step = min(global_step, decay_steps)
+  cosine_decay = 0.5 * (1 + cos(pi * global_step / decay_steps))
+  decayed = (1 - alpha) * cosine_decay + alpha
+  decayed_learning_rate = learning_rate * decayed
+  ```
+
+    Parameters
+    ----------
+    start_lr
+            Starting learning rate 
+    stop_lr
+            Minimum learning rate value as a fraction of learning_rate.
+    decay_steps
+            Number of steps to decay over.
+    """
+
+    def __init__(
+        self,
+        start_lr: float,
+        stop_lr: float = 5e-8,
+        decay_steps: int = 100000,
+    ) -> None:
+        """Constructor."""
+        self.cd = {}
+        self.cd["start_lr"] = start_lr
+        self.cd["stop_lr"] = stop_lr
+        self.cd["decay_steps"] = decay_steps
+        self.start_lr_ = self.cd["start_lr"]
+        self.alpha_ = self.cd["stop_lr"]/self.cd["start_lr"]
+
+    def build(
+        self, global_step: tf.Tensor, stop_step: Optional[int] = None
+    ) -> tf.Tensor:
+        """Build the learning rate.
+
+        Parameters
+        ----------
+        global_step
+            The tf Tensor prividing the global training step
+        stop_step
+            The stop step.
+
+        Returns
+        -------
+        learning_rate
+            The learning rate
+        """
+        if stop_step is None:
+            self.decay_steps_ = (
+                self.cd["decay_steps"] if self.cd["decay_steps"] is not None else 100000
+            )
+        else:
+            self.stop_lr_ = (
+                self.cd["stop_lr"] if self.cd["stop_lr"] is not None else 5e-8
+            )
+            self.decay_steps_ = (
+                self.cd["decay_steps"]
+                if self.cd["decay_steps"] is not None
+                else stop_step
+            )
+
+        return tf.train.cosine_decay(
+            self.start_lr_,
+            global_step,
+            self.decay_steps_,
+            self.alpha_,
+            name="cosine",
+        )
+
+    def start_lr(self) -> float:
+        """Get the start lr."""
+        return self.start_lr_
+
+    def value(self, step: int) -> float:
+        """Get the lr at a certain step."""
+        step = min(step, self.decay_steps_)
+        cosine_decay = 0.5 * (1 + np.cos(np.pi * step / self.decay_steps_))
+        decayed = (1 - self.alpha_) * cosine_decay + self.alpha_
+        decayed_learning_rate = self.start_lr_ * decayed
+        return decayed_learning_rate
+
+
+class LearningRateCosRestarts:
+    r"""The cosine decaying restart learning rate.
+
+  The function returns the cosine decayed learning rate while taking into account
+  possible warm restarts.
+  ```
+
+    Parameters
+    ----------
+    start_lr
+            Starting learning rate 
+    stop_lr
+            Minimum learning rate value as a fraction of learning_rate.
+    decay_steps
+            Number of steps to decay over.
+    """
+
+    def __init__(
+        self,
+        start_lr: float,
+        stop_lr: float = 5e-8,
+        decay_steps: int = 10000,
+    ) -> None:
+        """Constructor."""
+        self.cd = {}
+        self.cd["start_lr"] = start_lr
+        self.cd["stop_lr"] = stop_lr
+        self.cd["decay_steps"] = decay_steps
+        self.start_lr_ = self.cd["start_lr"]
+        self.alpha_ = self.cd["stop_lr"]/self.cd["start_lr"]
+
+    def build(
+        self, global_step: tf.Tensor, stop_step: Optional[int] = None
+    ) -> tf.Tensor:
+        """Build the learning rate.
+
+        Parameters
+        ----------
+        global_step
+            The tf Tensor prividing the global training step
+        stop_step
+            The stop step.
+
+        Returns
+        -------
+        learning_rate
+            The learning rate
+        """
+        if stop_step is None:
+            self.decay_steps_ = (
+                self.cd["decay_steps"] if self.cd["decay_steps"] is not None else 10000
+            )
+        else:
+            self.stop_lr_ = (
+                self.cd["stop_lr"] if self.cd["stop_lr"] is not None else 5e-8
+            )
+            self.decay_steps_ = (
+                self.cd["decay_steps"]
+                if self.cd["decay_steps"] is not None
+                else stop_step
+            )
+
+ 
+
+        return tf.train.cosine_decay_restarts(
+            learning_rate=self.start_lr_,
+            global_step=global_step,
+            first_decay_steps=self.decay_steps_,
+            alpha=self.alpha_,
+            name="cosinerestart",
+        )
+
+    def start_lr(self) -> float:
+        """Get the start lr."""
+        return self.start_lr_
+
+    def value(self, step: int) -> float:
+        """Get the lr at a certain step. Need to revise later"""
+        step = min(step, self.decay_steps_)
+        cosine_decay = 0.5 * (1 + np.cos(np.pi * step / self.decay_steps_))
+        decayed = (1 - self.alpha_) * cosine_decay + self.alpha_
+        decayed_learning_rate = self.start_lr_ * decayed
+        return decayed_learning_rate

From 05052c195308f61b63ce2bab130ce0e8cba60604 Mon Sep 17 00:00:00 2001
From: Zhaoming Xia <747247642@qq.com>
Date: Fri, 27 Oct 2023 18:14:47 +0800
Subject: [PATCH 3/3] add cosine restart learning rate

---
 deepmd/train/trainer.py | 46 +++++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py
index d99f657838..1d6c0e71fe 100644
--- a/deepmd/train/trainer.py
+++ b/deepmd/train/trainer.py
@@ -115,8 +115,8 @@ def get_lr_and_coef(lr_param):
                 scale_lr_coef = np.sqrt(self.run_opt.world_size).real
             else:
                 scale_lr_coef = 1.0
-            lr_type = lr_param.get("type", "exp")
-            if lr_type == "exp":
+            self.lr_type = lr_param.get("type", "exp")
+            if self.lr_type == "exp":
                 lr = LearningRateExp(
                     lr_param["start_lr"], lr_param["stop_lr"], lr_param["decay_steps"]
                 )
@@ -129,7 +129,7 @@ def get_lr_and_coef(lr_param):
                     lr_param["start_lr"], lr_param["stop_lr"], lr_param["decay_steps"]
                 )
             else:
-                raise RuntimeError("unknown learning_rate type " + lr_type)
+                raise RuntimeError("unknown learning_rate type " + self.lr_type)
             return lr, scale_lr_coef
 
         # learning rate
@@ -563,29 +563,31 @@ def train(self, train_data=None, valid_data=None):
         is_first_step = True
         self.cur_batch = cur_batch
         if not self.multi_task_mode:
-            log.info(
-                "start training at lr %.2e (== %.2e), decay_step %d, decay_rate %f, final lr will be %.2e"
-                % (
-                    run_sess(self.sess, self.learning_rate),
-                    self.lr.value(cur_batch),
-                    self.lr.decay_steps_,
-                    self.lr.decay_rate_,
-                    self.lr.value(stop_batch),
-                )
-            )
-        else:
-            for fitting_key in self.fitting:
+            if self.lr_type == "exp":
                 log.info(
-                    "%s: start training at lr %.2e (== %.2e), decay_step %d, decay_rate %f, final lr will be %.2e"
+                    "start training at lr %.2e (== %.2e), decay_step %d, decay_rate %f, final lr will be %.2e"
                     % (
-                        fitting_key,
-                        run_sess(self.sess, self.learning_rate_dict[fitting_key]),
-                        self.lr_dict[fitting_key].value(cur_batch),
-                        self.lr_dict[fitting_key].decay_steps_,
-                        self.lr_dict[fitting_key].decay_rate_,
-                        self.lr_dict[fitting_key].value(stop_batch),
+                        run_sess(self.sess, self.learning_rate),
+                        self.lr.value(cur_batch),
+                        self.lr.decay_steps_,
+                        self.lr.decay_rate_,
+                        self.lr.value(stop_batch),
                     )
                 )
+        else:
+            for fitting_key in self.fitting:
+                if self.lr_type == "exp":
+                    log.info(
+                        "%s: start training at lr %.2e (== %.2e), decay_step %d, decay_rate %f, final lr will be %.2e"
+                        % (
+                            fitting_key,
+                            run_sess(self.sess, self.learning_rate_dict[fitting_key]),
+                            self.lr_dict[fitting_key].value(cur_batch),
+                            self.lr_dict[fitting_key].decay_steps_,
+                            self.lr_dict[fitting_key].decay_rate_,
+                            self.lr_dict[fitting_key].value(stop_batch),
+                        )
+                    )
 
         prf_options = None
         prf_run_metadata = None