-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtrain_encoder_script.py
91 lines (71 loc) · 3.18 KB
/
train_encoder_script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from lib.restartable_pendulum import RestartablePendulumEnv
from lib.state_rep import train_encoder
import numpy as np
from matplotlib import pyplot as plt
import itertools
import sys
def main():
for arg in sys.argv:
if arg.startswith('--job='):
job_iter = int(arg.split('--job=')[1]) - 1
# specify environment information
env = RestartablePendulumEnv()
state_dim = 3
act_dim = 1
# specify training details to loop over
archs = [[64], [64,64], [64,64,64], [128], [128, 128], [128,128,128], [300], [300,300]]
traj_lens = [5,10,20]
param_lists = [archs, traj_lens]
traj_type="drive"
i = job_iter
tup = list(itertools.product(*param_lists))[i]
#print(total_models)
#for i,tup in enumerate(itertools.product(*param_lists)): # loop over the various architectures
# print("\nStarting {0} of {1} representations\n".format(i+1,total_models))
parameters = {
"n_episodes" : 3*20000,
"n_passes" : 1,
"batch_size" : 100,
"learning_rate" : 1e-3,
"widths" : tup[0],
"traj_len" : tup[1]
}
widths = parameters["widths"]
traj_len = parameters["traj_len"]
save_dir = "./experiments/state_rep_params/pendulum/{}".format(i+32)
n_episodes = parameters["n_episodes"]
n_passes = parameters["n_passes"]
batch_size = parameters["batch_size"]
learning_rate = parameters["learning_rate"]
init_projectors=None
init_weights=None
init_biases=None
# generate the seeds for the training trajectories
start_states = [np.array([(np.random.rand(1)[0]*2 - 1)*np.pi, (np.random.rand(1)[0]*2 - 1)*8])
for _ in range(n_episodes)]
start_actions = [np.random.rand(1)*4-2 for _ in range(n_episodes)]
projectors,weights,biases,losses = train_encoder(env, start_states, start_actions, traj_len, n_passes,
state_dim, act_dim, widths,
traj_type=traj_type,
learning_rate=learning_rate,
init_projectors=init_projectors,
init_weights=init_weights,
init_biases=init_biases,
batch_size = batch_size,
save_dir = save_dir,
show_progress=False,
track_loss_every = int(n_episodes/(batch_size*200)))
# save the representation weights
np.savez(save_dir + "projectors.npz",*projectors)
np.savez(save_dir + "weights.npz",*weights)
np.savez(save_dir + "biases.npz",*biases)
# save the training params
with open(save_dir + "train_params.txt","w") as f:
for tup in parameters.items():
f.write(" ".join([str(v) for v in tup]))
f.write("\n")
plt.plot(losses)
plt.savefig(save_dir + "losses.png")
plt.clf()
if __name__ == '__main__':
main()