@@ -64,10 +64,10 @@ public class Adam<Model: Layer, Scalar: TensorFlowFloatingPoint>: Optimizer
64
64
let stepSize = learningRate * ( sqrt ( 1 - pow( beta2, step) ) / ( 1 - pow( beta1, step) ) )
65
65
for kp in model. recursivelyAllWritableKeyPaths ( to: Tensor< Scalar> . self ) {
66
66
firstMoments [ keyPath: kp] =
67
- firstMoments [ keyPath: kp] * beta1 + ( 1 - beta1) * gradient [ keyPath: kp]
67
+ firstMoments [ keyPath: kp] * beta1 + ( 1 - beta1) * vector [ keyPath: kp]
68
68
secondMoments [ keyPath: kp] =
69
69
secondMoments [ keyPath: kp] * beta2 + ( 1 - beta2) *
70
- gradient [ keyPath: kp] * gradient [ keyPath: kp]
70
+ vector [ keyPath: kp] * vector [ keyPath: kp]
71
71
model [ keyPath: kp] -=
72
72
stepSize * firstMoments[ keyPath: kp] / ( sqrt ( secondMoments [ keyPath: kp] ) + epsilon)
73
73
}
@@ -106,9 +106,9 @@ public class RMSProp<Model: Layer, Scalar: TensorFlowFloatingPoint>: Optimizer
106
106
let learningRate = self . learningRate * 1 / ( 1 + decay * step)
107
107
for kp in model. recursivelyAllWritableKeyPaths ( to: Tensor< Scalar> . self ) {
108
108
alpha [ keyPath: kp] =
109
- rho * alpha[ keyPath: kp] + ( 1 - rho) * pow( gradient [ keyPath: kp] , 2 )
109
+ rho * alpha[ keyPath: kp] + ( 1 - rho) * pow( vector [ keyPath: kp] , 2 )
110
110
model [ keyPath: kp] -=
111
- learningRate * gradient [ keyPath: kp] / ( sqrt ( alpha [ keyPath: kp] ) + epsilon)
111
+ learningRate * vector [ keyPath: kp] / ( sqrt ( alpha [ keyPath: kp] ) + epsilon)
112
112
}
113
113
}
114
114
}
@@ -140,15 +140,15 @@ public class SGD<Model: Layer, Scalar: TensorFlowFloatingPoint>: Optimizer
140
140
private var velocity = Model . AllDifferentiableVariables. zero
141
141
142
142
public func update( _ model: inout Model . AllDifferentiableVariables ,
143
- along vectors : Model . CotangentVector ) {
143
+ along vector : Model . CotangentVector ) {
144
144
step += 1
145
145
let learningRate = self . learningRate * 1 / ( 1 + decay * step)
146
146
for kp in model. recursivelyAllWritableKeyPaths ( to: Tensor< Scalar> . self ) {
147
147
velocity [ keyPath: kp] =
148
- momentum * velocity[ keyPath: kp] - learningRate * gradients [ keyPath: kp]
148
+ momentum * velocity[ keyPath: kp] - learningRate * vector [ keyPath: kp]
149
149
if nesterov {
150
150
model [ keyPath: kp] +=
151
- momentum * velocity[ keyPath: kp] - learningRate * gradients [ keyPath: kp]
151
+ momentum * velocity[ keyPath: kp] - learningRate * vector [ keyPath: kp]
152
152
} else {
153
153
model [ keyPath: kp] += velocity [ keyPath: kp]
154
154
}
@@ -168,6 +168,6 @@ public class RiemannSGD<Model: Layer, Scalar: FloatingPoint>: Optimizer
168
168
169
169
public func update( _ model: inout Model . AllDifferentiableVariables ,
170
170
along vector: Model . CotangentVector ) {
171
- model = model. moved ( along: learningRate * ( . zero - model. tangentVector ( from: gradient ) ) )
171
+ model = model. moved ( along: learningRate * ( . zero - model. tangentVector ( from: vector ) ) )
172
172
}
173
173
}
0 commit comments