Skip to content

Commit 92a5c6e

Browse files
committed
[ET-VK][Ops] aten.convolution (Bias=False)
Pull Request resolved: #2887 The final touches to get ET-VK convolution on-par with ATen-VK's convolution. ## Idea In our shaders, we add the bias to our sum. ``` ${VEC4_T[DTYPE]} sum = texelFetch(bias_in, ivec2(pos.z, 0), 0); ``` To keep our shaders as is, we implement having no bias by allocating a buffer of zeros. Then, our shader adds zero to our sum. ## Issue If `Bias=False`, dummy buffer of zeros is not serialized with the graph. The bias ValueRef is deserialized in the runtime as `TypeTag::NONE`, not `TypeTag::TENSORREF`. ## Solution If `TypeTag::NONE` is given, (1) create the `vTensor` using the `out_channels` value from the weights, (2) allocate a StagingBuffer of that size, and (3) `memset` its data to zero. Failure to do (3) will result in undefined behavior. ghstack-source-id: 221926167 @exported-using-ghexport Differential Revision: [D55814589](https://our.internmc.facebook.com/intern/diff/D55814589/)
1 parent 99c4f4e commit 92a5c6e

File tree

6 files changed

+69
-11
lines changed

6 files changed

+69
-11
lines changed

backends/vulkan/runtime/graph/ops/PrepackNode.cpp

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,16 +43,32 @@ PrepackNode::PrepackNode(
4343
graph.update_descriptor_counts(noop_shader_, /*execute = */ false);
4444
}
4545

46-
void PrepackNode::encode(ComputeGraph* graph) {
47-
api::Context* const context = graph->context();
48-
49-
TensorRef& tref = graph->get_val(tref_).toTensorRef();
46+
api::StorageBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) {
5047
vTensor& packed = graph->get_val(packed_).toTensor();
5148

49+
// If no TensorRef is provided, create a staging buffer of zeros according to
50+
// the vTensor metadata.
51+
if (graph->get_val(tref_).isNone()) {
52+
size_t numel = api::utils::multiply_integers(packed.sizes());
53+
api::StorageBuffer staging(graph->context(), packed.dtype(), numel);
54+
size_t nbytes = numel * api::element_size(packed.dtype());
55+
set_staging_zeros(staging, nbytes);
56+
return staging;
57+
}
58+
59+
TensorRef& tref = graph->get_val(tref_).toTensorRef();
5260
size_t numel = api::utils::multiply_integers(tref.sizes);
5361
api::StorageBuffer staging(graph->context(), tref.dtype, numel);
5462
size_t nbytes = numel * api::element_size(tref.dtype);
5563
copy_ptr_to_staging(tref.data, staging, nbytes);
64+
return staging;
65+
}
66+
67+
void PrepackNode::encode(ComputeGraph* graph) {
68+
api::Context* const context = graph->context();
69+
70+
vTensor& packed = graph->get_val(packed_).toTensor();
71+
api::StorageBuffer staging = create_staging_buffer(graph);
5672

5773
std::unique_lock<std::mutex> cmd_lock = context->dispatch_lock();
5874

@@ -76,7 +92,7 @@ void PrepackNode::encode(ComputeGraph* graph) {
7692
}
7793

7894
// Submit a compute shader that performs a no-op with the packed tensor in
79-
// order to trigger a image layout transition from GENERAL to
95+
// order to trigger an image layout transition from GENERAL to
8096
// READ_ONLY_OPTIMAL. This ensures that future uses of the tensor will be
8197
// bound with the correct image layout.
8298
{

backends/vulkan/runtime/graph/ops/PrepackNode.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ class PrepackNode final {
4848
const ValueRef packed_;
4949
// TODO(T180906457): allow re-computing param buffers.
5050
std::vector<std::shared_ptr<api::UniformParamsBuffer>> params_;
51+
52+
private:
53+
api::StorageBuffer create_staging_buffer(ComputeGraph* graph);
5154
};
5255

5356
} // namespace vkcompute

backends/vulkan/runtime/graph/ops/impl/Conv2d.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,16 @@ void resize_conv2d_node(
5252
out.virtual_resize(new_out_sizes);
5353
}
5454

55-
ValueRef prepack_biases(ComputeGraph& graph, const ValueRef vref) {
56-
if (graph.get_val(vref).isNone()) {
57-
VK_THROW("aten.convolution.default: Null bias is not supported yet!");
58-
}
55+
ValueRef prepack_biases(
56+
ComputeGraph& graph,
57+
const ValueRef vref,
58+
const ValueRef weight,
59+
const bool transposed) {
60+
TensorRef& tref = graph.get_val(weight).toTensorRef();
61+
const int64_t out_channels = transposed ? tref.sizes.at(1) : tref.sizes.at(0);
5962

60-
ValueRef v = graph.add_tensor_like(vref, api::kTexture2D, api::kWidthPacked);
63+
ValueRef v = graph.add_tensor(
64+
{out_channels}, tref.dtype, api::kTexture2D, api::kWidthPacked);
6165
vTensor& t = graph.get_val(v).toTensor();
6266

6367
api::ShaderInfo shader = get_nchw_to_image_shader(t);
@@ -296,7 +300,7 @@ void add_conv2d_node(
296300

297301
ValueRef arg_in = prepack_if_tensor_ref(graph, in);
298302
ValueRef arg_weight = prepack_weights(graph, weight, method);
299-
ValueRef arg_bias = prepack_biases(graph, bias);
303+
ValueRef arg_bias = prepack_biases(graph, bias, weight, transposed_val);
300304

301305
vTensor& t_in = graph.get_val(arg_in).toTensor();
302306
vTensor& t_out = graph.get_val(out).toTensor();

backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,12 @@ void copy_staging_to_ptr(
8989
memcpy_from_mapping(mapping, dst, nbytes, staging.dtype());
9090
}
9191

92+
void set_staging_zeros(api::StorageBuffer& staging, const size_t nbytes) {
93+
api::MemoryMap mapping(staging.buffer(), api::MemoryAccessType::WRITE);
94+
uint8_t* data_ptr = mapping.template data<uint8_t>();
95+
memset(data_ptr, 0, staging.nbytes());
96+
}
97+
9298
api::ShaderInfo get_nchw_to_image_shader(const vTensor& v_dst) {
9399
if (v_dst.is_quantized()) {
94100
VK_THROW("Quantized Tensors are currently not supported!");

backends/vulkan/runtime/graph/ops/utils/StagingUtils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ void copy_staging_to_ptr(
2525
void* dst,
2626
const size_t nbytes);
2727

28+
void set_staging_zeros(api::StorageBuffer& staging, const size_t nbytes);
29+
2830
//
2931
// Functions to get shaders
3032
//

backends/vulkan/test/test_vulkan_delegate.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -601,3 +601,30 @@ def forward(self, x):
601601
sample_inputs,
602602
memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
603603
)
604+
605+
def test_vulkan_backend_conv2d_bias_false(self):
606+
class Conv2dModule(torch.nn.Module):
607+
def __init__(self):
608+
super().__init__()
609+
self.conv = torch.nn.Conv2d(
610+
in_channels=6,
611+
out_channels=8,
612+
kernel_size=(3, 3),
613+
padding=(2, 3),
614+
stride=(1, 2),
615+
dilation=1,
616+
groups=1,
617+
bias=False,
618+
)
619+
620+
def forward(self, x):
621+
return self.conv(x)
622+
623+
conv2d_module = Conv2dModule()
624+
sample_inputs = (torch.randn(size=(1, 6, 40, 50), dtype=torch.float32),)
625+
626+
self.lower_module_and_test_output(
627+
conv2d_module,
628+
sample_inputs,
629+
memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
630+
)

0 commit comments

Comments
 (0)