-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrl_layers.go
149 lines (111 loc) · 3.94 KB
/
rl_layers.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
// Copyright (c) 2020, The Emergent Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package axon
import (
"cogentcore.org/core/math32/minmax"
)
//gosl:start rl_layers
// RWPredParams parameterizes reward prediction for a simple Rescorla-Wagner
// learning dynamic (i.e., PV learning in the Rubicon framework).
type RWPredParams struct {
// default 0.1..0.99 range of predictions that can be represented -- having a truncated range preserves some sensitivity in dopamine at the extremes of good or poor performance
PredRange minmax.F32
}
func (rp *RWPredParams) Defaults() {
rp.PredRange.Set(0.01, 0.99)
}
func (rp *RWPredParams) Update() {
}
// RWDaParams computes a dopamine (DA) signal using simple Rescorla-Wagner
// learning dynamic (i.e., PV learning in the Rubicon framework).
type RWDaParams struct {
// tonic baseline Ge level for DA = 0 -- +/- are between 0 and 2*TonicGe -- just for spiking display of computed DA value
TonicGe float32
// idx of RWPredLayer to get reward prediction from -- set during Build from BuildConfig RWPredLayName
RWPredLayIndex int32 `edit:"-"`
pad, pad1 uint32
}
func (rp *RWDaParams) Defaults() {
rp.TonicGe = 0.2
}
func (rp *RWDaParams) Update() {
}
// GeFromDA returns excitatory conductance from DA dopamine value
func (rp *RWDaParams) GeFromDA(da float32) float32 {
ge := rp.TonicGe * (1.0 + da)
if ge < 0 {
ge = 0
}
return ge
}
// TDIntegParams are params for reward integrator layer
type TDIntegParams struct {
// discount factor -- how much to discount the future prediction from TDPred
Discount float32
// gain factor on TD rew pred activations
PredGain float32
// idx of TDPredLayer to get reward prediction from -- set during Build from BuildConfig TDPredLayName
TDPredLayIndex int32 `edit:"-"`
pad uint32
}
func (tp *TDIntegParams) Defaults() {
tp.Discount = 0.9
tp.PredGain = 1
}
func (tp *TDIntegParams) Update() {
}
// TDDaParams are params for dopamine (DA) signal as the temporal difference (TD)
// between the TDIntegLayer activations in the minus and plus phase.
type TDDaParams struct {
// tonic baseline Ge level for DA = 0 -- +/- are between 0 and 2*TonicGe -- just for spiking display of computed DA value
TonicGe float32
// idx of TDIntegLayer to get reward prediction from -- set during Build from BuildConfig TDIntegLayName
TDIntegLayIndex int32 `edit:"-"`
pad, pad1 uint32
}
func (tp *TDDaParams) Defaults() {
tp.TonicGe = 0.3
}
func (tp *TDDaParams) Update() {
}
// GeFromDA returns excitatory conductance from DA dopamine value
func (tp *TDDaParams) GeFromDA(da float32) float32 {
return tp.TonicGe * (1.0 + da)
}
//gosl:end rl_layers
// note: Defaults not called on GPU
func (ly *LayerParams) RWDefaults() {
ly.Inhib.ActAvg.Nominal = .5
}
func (ly *LayerParams) RWPredDefaults() {
ly.Acts.Decay.Act = 1
ly.Acts.Decay.Glong = 1
ly.Acts.Dt.GeTau = 40
}
// RWDaPostBuild does post-Build config
func (ly *Layer) RWDaPostBuild() {
ly.Params.RWDa.RWPredLayIndex = ly.BuildConfigFindLayer("RWPredLayName", true)
}
func (ly *LayerParams) TDDefaults() {
ly.Inhib.ActAvg.Nominal = .5
}
func (ly *LayerParams) TDPredDefaults() {
ly.Acts.Decay.Act = 1
ly.Acts.Decay.Glong = 1
ly.Acts.Dt.GeTau = 40
}
func (ly *Layer) LDTPostBuild() {
ly.Params.LDT.SrcLay1Index = ly.BuildConfigFindLayer("SrcLay1Name", false) // optional
ly.Params.LDT.SrcLay2Index = ly.BuildConfigFindLayer("SrcLay2Name", false) // optional
ly.Params.LDT.SrcLay3Index = ly.BuildConfigFindLayer("SrcLay3Name", false) // optional
ly.Params.LDT.SrcLay4Index = ly.BuildConfigFindLayer("SrcLay4Name", false) // optional
}
// TDIntegPostBuild does post-Build config
func (ly *Layer) TDIntegPostBuild() {
ly.Params.TDInteg.TDPredLayIndex = ly.BuildConfigFindLayer("TDPredLayName", true)
}
// TDDaPostBuild does post-Build config
func (ly *Layer) TDDaPostBuild() {
ly.Params.TDDa.TDIntegLayIndex = ly.BuildConfigFindLayer("TDIntegLayName", true)
}