Workaround for SR-10697. (#123)

dan-zheng · web-flow · commit 5883af609ff2 · 2019-05-16T15:40:13.000-07:00
Add `State` wrapper struct to `SimpleRNNCell` to work around IRGen crash. TF-507 tracks reverting the change after SR-10697 is fixed.
diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift
@@ -1299,11 +1299,19 @@ public struct SimpleRNNCell<Scalar: TensorFlowFloatingPoint>: RNNCell, VectorNum
         return TensorShape([1, weight.shape[1]])
     }
 
-    public var zeroState: Tensor<Scalar> {
-        return Tensor(zeros: stateShape)
+    public var zeroState: State {
+        return State(Tensor(zeros: stateShape))
+    }
+
+    // TODO(TF-507): Revert to `typealias State = Tensor<Scalar>` after
+    // SR-10697 is fixed.
+    public struct State: Differentiable {
+        public let value: Tensor<Scalar>
+        public init(_ value: Tensor<Scalar>) {
+            self.value = value
+        }
     }
 
-    public typealias State = Tensor<Scalar>
     public typealias TimeStepInput = Tensor<Scalar>
     public typealias TimeStepOutput = State
     public typealias Input = RNNCellInput<TimeStepInput, State>
@@ -1319,8 +1327,7 @@ public struct SimpleRNNCell<Scalar: TensorFlowFloatingPoint>: RNNCell, VectorNum
                 seed: (Int64, Int64) = (Int64.random(in: Int64.min..<Int64.max),
                                         Int64.random(in: Int64.min..<Int64.max))) {
         let concatenatedInputSize = inputSize + hiddenSize
-        self.weight = Tensor(glorotUniform: [concatenatedInputSize, hiddenSize],
-                             seed: seed)
+        self.weight = Tensor(glorotUniform: [concatenatedInputSize, hiddenSize], seed: seed)
         self.bias = Tensor(zeros: [hiddenSize])
     }
 
@@ -1330,8 +1337,8 @@ public struct SimpleRNNCell<Scalar: TensorFlowFloatingPoint>: RNNCell, VectorNum
     /// - Returns: The hidden state.
     @differentiable
     public func call(_ input: Input) -> Output {
-        let concatenatedInput = input.input.concatenated(with: input.state, alongAxis: 1)
-        let newState = tanh(matmul(concatenatedInput, weight) + bias)
+        let concatenatedInput = input.input.concatenated(with: input.state.value, alongAxis: 1)
+        let newState = State(tanh(matmul(concatenatedInput, weight) + bias))
         return Output(output: newState, state: newState)
     }
 }