Skip to content
This repository was archived by the owner on Jul 1, 2023. It is now read-only.

Add support for dilated Conv1D and Conv2D #275

Merged
merged 4 commits into from
Jun 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 43 additions & 13 deletions Sources/TensorFlow/Layers/Convolutional.swift
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ public struct Conv1D<Scalar: TensorFlowFloatingPoint>: Layer {
@noDerivative public let stride: Int
/// The padding algorithm for convolution.
@noDerivative public let padding: Padding

/// The dilation factor for temporal dimension.
@noDerivative public let dilation: Int

/// Creates a `Conv1D` layer with the specified filter, bias, activation function, stride, and
/// padding.
///
Expand All @@ -40,18 +42,21 @@ public struct Conv1D<Scalar: TensorFlowFloatingPoint>: Layer {
/// - activation: The element-wise activation function.
/// - stride: The stride of the sliding window for temporal dimension.
/// - padding: The padding algorithm for convolution.
/// - dilation: The dilation factor for temporal dimension.
public init(
filter: Tensor<Scalar>,
bias: Tensor<Scalar>,
activation: @escaping Activation,
stride: Int,
padding: Padding
padding: Padding,
dilation: Int
) {
self.filter = filter
self.bias = bias
self.activation = activation
self.stride = stride
self.padding = padding
self.dilation = dilation
}

/// Returns the output obtained from applying the layer to the given input.
Expand All @@ -60,8 +65,12 @@ public struct Conv1D<Scalar: TensorFlowFloatingPoint>: Layer {
/// - Returns: The output `[batchCount, newWidth, outputChannels]`.
@differentiable
public func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
let conv = conv2D(input.expandingShape(at: 1), filter: filter.expandingShape(at: 0),
strides: (1, 1, stride, 1), padding: padding)
let conv = conv2D(
input.expandingShape(at: 1),
filter: filter.expandingShape(at: 0),
strides: (1, 1, stride, 1),
padding: padding,
dilations: (1, 1, dilation, 1))
return activation(conv.squeezingShape(at: 1) + bias)
}
}
Expand All @@ -76,15 +85,17 @@ public extension Conv1D where Scalar.RawSignificand: FixedWidthInteger {
/// `[width, inputChannels, outputChannels]`.
/// - stride: The stride of the sliding window for temporal dimension.
/// - padding: The padding algorithm for convolution.
/// - dilation: The dilation factor for temporal dimension.
/// - activation: The element-wise activation function.
/// - generator: The random number generator for initialization.
///
/// - Note: Use `init(filterShape:stride:padding:activation:seed:)` for faster random
/// - Note: Use `init(filterShape:stride:padding:dilation:activation:seed:)` for faster random
/// initialization.
init<G: RandomNumberGenerator>(
filterShape: (Int, Int, Int),
stride: Int = 1,
padding: Padding = .valid,
dilation: Int = 1,
activation: @escaping Activation = identity,
generator: inout G
) {
Expand All @@ -95,7 +106,8 @@ public extension Conv1D where Scalar.RawSignificand: FixedWidthInteger {
bias: Tensor(zeros: [filterShape.2]),
activation: activation,
stride: stride,
padding: padding)
padding: padding,
dilation: dilation)
}
}

Expand All @@ -109,12 +121,14 @@ public extension Conv1D {
/// `[width, inputChannels, outputChannels]`.
/// - stride: The stride of the sliding window for temporal dimension.
/// - padding: The padding algorithm for convolution.
/// - dilation: The dilation factor for the temporal dimension.
/// - activation: The element-wise activation function.
/// - seed: The random seed for initialization. The default value is random.
init(
filterShape: (Int, Int, Int),
stride: Int = 1,
padding: Padding = .valid,
dilation: Int = 1,
activation: @escaping Activation = identity,
seed: (Int32, Int32) = (Int32.random(in: Int32.min..<Int32.max),
Int32.random(in: Int32.min..<Int32.max))
Expand All @@ -126,7 +140,8 @@ public extension Conv1D {
bias: Tensor(zeros: [filterShape.2]),
activation: activation,
stride: stride,
padding: padding)
padding: padding,
dilation: dilation)
}
}

Expand All @@ -148,7 +163,9 @@ public struct Conv2D<Scalar: TensorFlowFloatingPoint>: Layer {
@noDerivative public let strides: (Int, Int)
/// The padding algorithm for convolution.
@noDerivative public let padding: Padding

/// The dilation factor for spatials dimensions.
@noDerivative public let dilations: (Int, Int)

/// Creates a `Conv2D` layer with the specified filter, bias, activation function, strides, and
/// padding.
///
Expand All @@ -158,18 +175,21 @@ public struct Conv2D<Scalar: TensorFlowFloatingPoint>: Layer {
/// - activation: The element-wise activation function.
/// - strides: The strides of the sliding window for spatial dimensions.
/// - padding: The padding algorithm for convolution.
/// - dilations: The dilation factor for spatials dimensions.
public init(
filter: Tensor<Scalar>,
bias: Tensor<Scalar>,
activation: @escaping Activation,
strides: (Int, Int),
padding: Padding
padding: Padding,
dilations: (Int, Int)
) {
self.filter = filter
self.bias = bias
self.activation = activation
self.strides = strides
self.padding = padding
self.dilations = dilations
}

/// Returns the output obtained from applying the layer to the given input.
Expand All @@ -178,8 +198,12 @@ public struct Conv2D<Scalar: TensorFlowFloatingPoint>: Layer {
/// - Returns: The output.
@differentiable
public func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
return activation(conv2D(input, filter: filter, strides: (1, strides.0, strides.1, 1),
padding: padding) + bias)
return activation(conv2D(
input,
filter: filter,
strides: (1, strides.0, strides.1, 1),
padding: padding,
dilations: (1, dilations.0, dilations.1, 1)) + bias)
}
}

Expand All @@ -192,6 +216,7 @@ public extension Conv2D {
/// - filterShape: The shape of the 4-D convolution kernel.
/// - strides: The strides of the sliding window for spatial dimensions.
/// - padding: The padding algorithm for convolution.
/// - dilations: The dilation factor for spatial dimensions.
/// - activation: The element-wise activation function.
/// - generator: The random number generator for initialization.
///
Expand All @@ -201,6 +226,7 @@ public extension Conv2D {
filterShape: (Int, Int, Int, Int),
strides: (Int, Int) = (1, 1),
padding: Padding = .valid,
dilations: (Int, Int) = (1, 1),
activation: @escaping Activation = identity,
generator: inout G
) {
Expand All @@ -211,7 +237,8 @@ public extension Conv2D {
bias: Tensor(zeros: [filterShape.3]),
activation: activation,
strides: strides,
padding: padding)
padding: padding,
dilations: dilations)
}
}

Expand All @@ -224,12 +251,14 @@ public extension Conv2D {
/// - filterShape: The shape of the 4-D convolution kernel.
/// - strides: The strides of the sliding window for spatial dimensions.
/// - padding: The padding algorithm for convolution.
/// - dilations: The dilation factor for spatial dimensions.
/// - activation: The element-wise activation function.
/// - seed: The random seed for initialization. The default value is random.
init(
filterShape: (Int, Int, Int, Int),
strides: (Int, Int) = (1, 1),
padding: Padding = .valid,
dilations: (Int, Int) = (1, 1),
activation: @escaping Activation = identity,
seed: (Int32, Int32) = (Int32.random(in: Int32.min..<Int32.max),
Int32.random(in: Int32.min..<Int32.max))
Expand All @@ -241,7 +270,8 @@ public extension Conv2D {
bias: Tensor(zeros: [filterShape.3]),
activation: activation,
strides: strides,
padding: padding)
padding: padding,
dilations: dilations)
}
}

Expand Down
57 changes: 34 additions & 23 deletions Sources/TensorFlow/Operators/NN.swift
Original file line number Diff line number Diff line change
Expand Up @@ -116,15 +116,17 @@ func conv2DBackpropInput<Scalar: TensorFlowFloatingPoint>(
shape: Tensor<Int32>,
filter: Tensor<Scalar>,
strides: (Int, Int, Int, Int),
padding: Padding
padding: Padding,
dilations: (Int, Int, Int, Int) = (1, 1, 1, 1)
) -> Tensor<Scalar> {
return Raw.conv2DBackpropInput(
inputSizes: shape,
filter: filter,
outBackprop: x,
strides: [Int32(strides.0), Int32(strides.1), Int32(strides.2), Int32(strides.3)],
padding: padding.raw2,
explicitPaddings: [])
explicitPaddings: [],
dilations: [Int32(dilations.0), Int32(dilations.1), Int32(dilations.2), Int32(dilations.3)])
}

/// TensorFlow builtin conv2d gradient helper for the filter.
Expand All @@ -135,15 +137,17 @@ func conv2DBackpropFilter<Scalar: TensorFlowFloatingPoint>(
input: Tensor<Scalar>,
filterSizes: Tensor<Int32>,
strides: (Int, Int, Int, Int),
padding: Padding
padding: Padding,
dilations: (Int, Int, Int, Int)
) -> Tensor<Scalar> {
return Raw.conv2DBackpropFilter(
input,
filterSizes: filterSizes,
outBackprop: x,
strides: [Int32(strides.0), Int32(strides.1), Int32(strides.2), Int32(strides.3)],
padding: padding.raw2,
explicitPaddings: [])
explicitPaddings: [],
dilations: [Int32(dilations.0), Int32(dilations.1), Int32(dilations.2), Int32(dilations.3)])
}

@usableFromInline
Expand All @@ -152,13 +156,15 @@ func _vjpConv2DBackpropInput<Scalar: TensorFlowFloatingPoint>(
_ shape: Tensor<Int32>,
_ filter: Tensor<Scalar>,
_ strides: (Int, Int, Int, Int),
_ padding: Padding
_ padding: Padding,
_ dilations: (Int, Int, Int, Int)
) -> (Tensor<Scalar>, (Tensor<Scalar>) -> (Tensor<Scalar>, Tensor<Scalar>)) {
let value = conv2DBackpropInput(x, shape: shape, filter: filter,
strides: strides, padding: padding)
strides: strides, padding: padding, dilations: dilations)
return (value, { v in
(conv2DBackpropFilter(x, input: v, filterSizes: shape, strides: strides, padding: padding),
conv2D(v, filter: filter, strides: strides, padding: padding))
(conv2DBackpropFilter(x, input: v, filterSizes: shape, strides: strides,
padding: padding, dilations: dilations),
conv2D(v, filter: filter, strides: strides, padding: padding, dilations: dilations))
})
}

Expand All @@ -168,13 +174,15 @@ func _vjpConv2DBackpropFilter<Scalar: TensorFlowFloatingPoint>(
_ input: Tensor<Scalar>,
_ filterSizes: Tensor<Int32>,
_ strides: (Int, Int, Int, Int),
_ padding: Padding
_ padding: Padding,
_ dilations: (Int, Int, Int, Int)
) -> (Tensor<Scalar>, (Tensor<Scalar>) -> (Tensor<Scalar>, Tensor<Scalar>)) {
let value = conv2DBackpropFilter(x, input: input, filterSizes: filterSizes,
strides: strides, padding: padding)
strides: strides, padding: padding, dilations: dilations)
return (value, { v in
(conv2DBackpropInput(x, shape: filterSizes, filter: v, strides: strides, padding: padding),
conv2D(input, filter: v, strides: strides, padding: padding))
(conv2DBackpropInput(x, shape: filterSizes, filter: v, strides: strides,
padding: padding, dilations: dilations),
conv2D(input, filter: v, strides: strides, padding: padding, dilations: dilations))
})
}

Expand All @@ -183,14 +191,15 @@ func _vjpConv2D<Scalar: TensorFlowFloatingPoint>(
_ x: Tensor<Scalar>,
filter: Tensor<Scalar>,
strides: (Int, Int, Int, Int),
padding: Padding
padding: Padding,
dilations: (Int, Int, Int, Int)
) -> (Tensor<Scalar>, (Tensor<Scalar>) -> (Tensor<Scalar>, Tensor<Scalar>)) {
let value = conv2D(x, filter: filter, strides: strides, padding: padding)
let value = conv2D(x, filter: filter, strides: strides, padding: padding, dilations: dilations)
return (value, { v in
(conv2DBackpropInput(v, shape: x.shapeTensor, filter: filter,
strides: strides, padding: padding),
strides: strides, padding: padding, dilations: dilations),
conv2DBackpropFilter(v, input: x, filterSizes: filter.shapeTensor,
strides: strides, padding: padding))
strides: strides, padding: padding, dilations: dilations))
})
}

Expand Down Expand Up @@ -282,11 +291,9 @@ func _vjpConv3D<Scalar: TensorFlowFloatingPoint>(
return (value, { v in
return (
conv3DBackpropInput(v, shape: x.shapeTensor, filter: filter,
strides: strides, padding: padding
),
strides: strides, padding: padding),
conv3DBackpropFilter(v, input: x, filterSizes: filter.shapeTensor,
strides: strides, padding: padding
)
strides: strides, padding: padding)
)
})
}
Expand Down Expand Up @@ -485,22 +492,26 @@ func _vjpAvgPool3D<Scalar: TensorFlowFloatingPoint>(
/// - input: The input.
/// - filter: The convolution filter.
/// - strides: The strides of the sliding filter for each dimension of the input.
/// - padding: The padding for the operation.
/// - padding: The padding for the operation
/// - dilations: The dilation factor for each dimension of the input.
/// - Precondition: `input` must have rank `4`.
/// - Precondition: `filter` must have rank 4.
@differentiable(wrt: (input, filter), vjp: _vjpConv2D)
public func conv2D<Scalar: TensorFlowFloatingPoint>(
_ input: Tensor<Scalar>,
filter: Tensor<Scalar>,
strides: (Int, Int, Int, Int),
padding: Padding
padding: Padding,
dilations: (Int, Int, Int, Int)
) -> Tensor<Scalar> {
return Raw.conv2D(
input,
filter: filter,
strides: [Int32(strides.0), Int32(strides.1), Int32(strides.2), Int32(strides.3)],
padding: padding.raw2,
explicitPaddings: [])
explicitPaddings: [],
dilations: [Int32(dilations.0), Int32(dilations.1), Int32(dilations.2), Int32(dilations.3)]
)
}

/// Returns a 3-D convolution with the specified input, filter, strides, and padding.
Expand Down
Loading