update tutorial

calico · Dec 24, 2024 · e8ac5aa · e8ac5aa
1 parent 684dc32
commit e8ac5aa
Show file tree

Hide file tree

Showing 21 changed files with 642 additions and 16 deletions.
diff --git a/README.md b/README.md
@@ -14,6 +14,8 @@ Baskerville provides researchers with tools to:
 
 Documentation page: https://calico.github.io/baskerville/index.html
 
+[Document page for transfer learning](docs/transfer/transfer.md)
+
 ---
 
 ### Installation

diff --git a/docs/transfer/transfer.md b/docs/transfer/transfer.md
@@ -108,12 +108,19 @@ Create *targets.txt*:
 ### Step 4. Create TFRecords
 
 ```bash
+cd baskerville/docs/transfer
+# change data_path in make_tfr.sh
 ./make_tfr.sh
 ```
 
 ### Step 5. Parameter Json File
 
-Similar to Borzoi training, arguments for training learning is specified in the params.json file. Add a additional `transfer` section in the parameter json file to allow transfer learning. For transfer learning rate, we suggest lowering the lr to 1e-5 for full fine-tuning, and keeping the original lr for other methods. For batch size, we suggest a batch size of 1 to reduce GPU memory for linear probing or adapter-based methods. Here's the `transfer` arguments for different transfer methods. You can also find the params.json file for Locon4 in the `params.json`.
+Similar to Borzoi training, arguments for training learning is specified in the params.json file. Add a additional `transfer` section in the parameter json file to allow transfer learning. For transfer learning rate, we suggest lowering the lr to 1e-5 for full fine-tuning, and keeping the original lr for other methods. For batch size, we suggest a batch size of 1 to reduce GPU memory for linear probing or adapter-based methods. Here's the `transfer` arguments for different transfer methods. 
+
+Example params.json files for transfer learning of Borzoi-lite are located: baskerville/tests/data/transfer/json/borzoilite_\*.json
+
+Example params.json files for transfer learning of full Borzoi are located: baskerville/tests/data/transfer/json/borzoi_\*.json
+
 
 **Full fine-tuning**:
 ```
@@ -166,7 +173,6 @@ Similar to Borzoi training, arguments for training learning is specified in the
         "conv_latent": 16
     },
 ```
-
 ### Step 6. Train model
 
 Run westminster_train_folds.py `--setup` to setup directory structures:
@@ -182,8 +188,14 @@ westminster_train_folds.py \
 Run hound_transfer.py on fold3 data for 4 replicate models:
 
 ```bash
-hound_transfer.py -o train/f0c0/train --restore ${data_path}/weights/borzoi_r0.h5 params.json train/f3c0/data0
-hound_transfer.py -o train/f1c0/train --restore ${data_path}/weights/borzoi_r1.h5 params.json train/f3c0/data0
-hound_transfer.py -o train/f2c0/train --restore ${data_path}/weights/borzoi_r2.h5 params.json train/f3c0/data0
-hound_transfer.py -o train/f3c0/train --restore ${data_path}/weights/borzoi_r3.h5 params.json train/f3c0/data0
+hound_transfer.py -o train_rep0 --restore ${data_path}/weights/borzoi_r0.h5 params.json train/f3c0/data0
+hound_transfer.py -o train_rep1 --restore ${data_path}/weights/borzoi_r1.h5 params.json train/f3c0/data0
+hound_transfer.py -o train_rep2 --restore ${data_path}/weights/borzoi_r2.h5 params.json train/f3c0/data0
+hound_transfer.py -o train_rep3 --restore ${data_path}/weights/borzoi_r3.h5 params.json train/f3c0/data0
 ```
+
+### Step 7. Load models
+
+We apply weight merging for lora, ia3, and locon weights, and so there is no architecture changes once the model is trained. You can use the same params.json file, and load the train_rep0/model_best.mergeW.h5 weight file.
+
+For houlsby and houlsby_se, model architectures change due to the insertion of adapter modules. New architecture json file can be found in train_rep0/params.json.
diff --git a/tests/data/transfer/json/borzoi_full.json b/tests/data/transfer/json/borzoi_full.json
@@ -0,0 +1,85 @@
+{
+    "train": {
+        "batch_size": 1,
+	"shuffle_buffer": 256,
+        "optimizer": "adam",
+	"learning_rate": 0.000006,
+	"loss": "poisson_mn",
+	"total_weight": 0.2,
+        "warmup_steps": 20000,
+        "global_clipnorm": 0.15,
+        "adam_beta1": 0.9,
+        "adam_beta2": 0.999,
+        "patience": 5,
+	"train_epochs_min": 10,
+	"train_epochs_max": 50
+    },
+    "transfer": {
+        "mode": "full"
+    },    
+    "model": {
+        "seq_length": 524288,
+        "augment_rc": true,
+        "augment_shift": 3,
+        "activation": "gelu",
+        "norm_type": "batch-sync",
+        "bn_momentum": 0.9,
+	"kernel_initializer": "lecun_normal",
+	"l2_scale": 2.0e-8,
+        "trunk": [
+            {
+                "name": "conv_dna",
+                "filters": 512,
+                "kernel_size": 15,
+		"norm_type": null,
+		"activation": "linear",
+                "pool_size": 2
+            },
+            {
+                "name": "res_tower",
+                "filters_init": 608,
+		"filters_end": 1536,
+		"divisible_by": 32,
+                "kernel_size": 5,
+		"num_convs": 1,
+                "pool_size": 2,
+                "repeat": 6
+            },
+            {
+                "name": "transformer_tower",
+                "key_size": 64,
+                "heads": 8,
+                "num_position_features": 32,
+                "dropout": 0.2,
+		"mha_l2_scale": 1.0e-8,
+		"l2_scale": 1.0e-8,
+		"kernel_initializer": "he_normal",
+		"repeat": 8
+            },
+	    {
+		"name": "unet_conv",
+		"kernel_size": 3,
+        "upsample_conv": true            
+	    },
+	    {
+		"name": "unet_conv",
+		"kernel_size": 3,
+        "upsample_conv": true            
+	    },
+            {
+                "name": "Cropping1D",
+                "cropping": 5120
+            },
+            {
+                "name": "conv_nac",
+                "filters": 1920,
+                "dropout": 0.1
+            }
+        ],
+	"head_human": {
+            "name": "final",
+            "units": 4,
+            "activation": "softplus"
+        }
+    }
+}
diff --git a/tests/data/transfer/json/borzoi_houlsby.json b/tests/data/transfer/json/borzoi_houlsby.json
@@ -0,0 +1,87 @@
+{
+    "train": {
+        "batch_size": 1,
+	"shuffle_buffer": 256,
+        "optimizer": "adam",
+	"learning_rate": 0.00006,
+	"loss": "poisson_mn",
+	"total_weight": 0.2,
+        "warmup_steps": 20000,
+        "global_clipnorm": 0.15,
+        "adam_beta1": 0.9,
+        "adam_beta2": 0.999,
+        "patience": 5,
+	"train_epochs_min": 10,
+	"train_epochs_max": 50
+    },
+    "transfer": {
+        "mode": "adapter",
+        "adapter": "houlsby",
+        "adapter_latent": 8
+    },    
+    "model": {
+        "seq_length": 524288,
+        "augment_rc": true,
+        "augment_shift": 3,
+        "activation": "gelu",
+        "norm_type": "batch-sync",
+        "bn_momentum": 0.9,
+	"kernel_initializer": "lecun_normal",
+	"l2_scale": 2.0e-8,
+        "trunk": [
+            {
+                "name": "conv_dna",
+                "filters": 512,
+                "kernel_size": 15,
+		"norm_type": null,
+		"activation": "linear",
+                "pool_size": 2
+            },
+            {
+                "name": "res_tower",
+                "filters_init": 608,
+		"filters_end": 1536,
+		"divisible_by": 32,
+                "kernel_size": 5,
+		"num_convs": 1,
+                "pool_size": 2,
+                "repeat": 6
+            },
+            {
+                "name": "transformer_tower",
+                "key_size": 64,
+                "heads": 8,
+                "num_position_features": 32,
+                "dropout": 0.2,
+		"mha_l2_scale": 1.0e-8,
+		"l2_scale": 1.0e-8,
+		"kernel_initializer": "he_normal",
+		"repeat": 8
+            },
+	    {
+		"name": "unet_conv",
+		"kernel_size": 3,
+        "upsample_conv": true            
+	    },
+	    {
+		"name": "unet_conv",
+		"kernel_size": 3,
+        "upsample_conv": true            
+	    },
+            {
+                "name": "Cropping1D",
+                "cropping": 5120
+            },
+            {
+                "name": "conv_nac",
+                "filters": 1920,
+                "dropout": 0.1
+            }
+        ],
+	"head_human": {
+            "name": "final",
+            "units": 4,
+            "activation": "softplus"
+        }
+    }
+}
diff --git a/tests/data/transfer/json/borzoi_ia3.json b/tests/data/transfer/json/borzoi_ia3.json
@@ -0,0 +1,86 @@
+{
+    "train": {
+        "batch_size": 1,
+	"shuffle_buffer": 256,
+        "optimizer": "adam",
+	"learning_rate": 0.00006,
+	"loss": "poisson_mn",
+	"total_weight": 0.2,
+        "warmup_steps": 20000,
+        "global_clipnorm": 0.15,
+        "adam_beta1": 0.9,
+        "adam_beta2": 0.999,
+        "patience": 5,
+	"train_epochs_min": 10,
+	"train_epochs_max": 50
+    },
+    "transfer": {
+        "mode": "adapter",
+        "adapter": "ia3"
+    },    
+    "model": {
+        "seq_length": 524288,
+        "augment_rc": true,
+        "augment_shift": 3,
+        "activation": "gelu",
+        "norm_type": "batch-sync",
+        "bn_momentum": 0.9,
+	"kernel_initializer": "lecun_normal",
+	"l2_scale": 2.0e-8,
+        "trunk": [
+            {
+                "name": "conv_dna",
+                "filters": 512,
+                "kernel_size": 15,
+		"norm_type": null,
+		"activation": "linear",
+                "pool_size": 2
+            },
+            {
+                "name": "res_tower",
+                "filters_init": 608,
+		"filters_end": 1536,
+		"divisible_by": 32,
+                "kernel_size": 5,
+		"num_convs": 1,
+                "pool_size": 2,
+                "repeat": 6
+            },
+            {
+                "name": "transformer_tower",
+                "key_size": 64,
+                "heads": 8,
+                "num_position_features": 32,
+                "dropout": 0.2,
+		"mha_l2_scale": 1.0e-8,
+		"l2_scale": 1.0e-8,
+		"kernel_initializer": "he_normal",
+		"repeat": 8
+            },
+	    {
+		"name": "unet_conv",
+		"kernel_size": 3,
+        "upsample_conv": true            
+	    },
+	    {
+		"name": "unet_conv",
+		"kernel_size": 3,
+        "upsample_conv": true            
+	    },
+            {
+                "name": "Cropping1D",
+                "cropping": 5120
+            },
+            {
+                "name": "conv_nac",
+                "filters": 1920,
+                "dropout": 0.1
+            }
+        ],
+	"head_human": {
+            "name": "final",
+            "units": 4,
+            "activation": "softplus"
+        }
+    }
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -14,6 +14,8 @@ Baskerville provides researchers with tools to: @@
     Documentation page: https://calico.github.io/baskerville/index.html
+    [Document page for transfer learning](docs/transfer/transfer.md)
     ---
     ### Installation
@@ Expand Down @@