Skip to content

[ET-VK][10/n] copy node, aten.repeat #3299

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions backends/vulkan/runtime/api/Tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,10 @@ class vTensor final {
*/
const api::BufferBindInfo texture_limits_ubo();

inline const api::utils::ivec3 texture_limits() const {
return texture_limits_.limits;
}

inline size_t numel() const {
return api::utils::multiply_integers(sizes());
}
Expand Down
37 changes: 37 additions & 0 deletions backends/vulkan/runtime/api/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -262,12 +262,23 @@ inline std::ostream& operator<<(std::ostream& os, const uvec3& v) {
return os;
}

inline std::ostream& operator<<(std::ostream& os, const ivec3& v) {
os << "(" << v.data[0u] << ", " << v.data[1u] << ", " << v.data[2u] << ")";
return os;
}

inline std::ostream& operator<<(std::ostream& os, const uvec4& v) {
os << "(" << v.data[0u] << ", " << v.data[1u] << ", " << v.data[2u] << ", "
<< v.data[3u] << ")";
return os;
}

inline std::ostream& operator<<(std::ostream& os, const ivec4& v) {
os << "(" << v.data[0u] << ", " << v.data[1u] << ", " << v.data[2u] << ", "
<< v.data[3u] << ")";
return os;
}

//
// std::vector<T> Handling
//
Expand Down Expand Up @@ -298,6 +309,25 @@ inline ivec2 make_ivec2(
}
}

inline ivec3 make_ivec3(
const std::vector<int64_t>& ints,
bool reverse = false) {
VK_CHECK_COND(ints.size() == 3);
if (reverse) {
return {
safe_downcast<int32_t>(ints[2]),
safe_downcast<int32_t>(ints[1]),
safe_downcast<int32_t>(ints[0]),
};
} else {
return {
safe_downcast<int32_t>(ints[0]),
safe_downcast<int32_t>(ints[1]),
safe_downcast<int32_t>(ints[2]),
};
}
}

inline ivec4 make_ivec4(
const std::vector<int64_t>& ints,
bool reverse = false) {
Expand Down Expand Up @@ -338,6 +368,13 @@ inline ivec3 make_ivec3(uvec3 ints) {
safe_downcast<int32_t>(ints.data[2u])};
}

inline uvec3 make_uvec3(ivec3 ints) {
return {
safe_downcast<uint32_t>(ints.data[0u]),
safe_downcast<uint32_t>(ints.data[1u]),
safe_downcast<uint32_t>(ints.data[2u])};
}

/*
* Given an vector of up to 4 uint64_t representing the sizes of a tensor,
* constructs a uvec4 containing those elements in reverse order.
Expand Down
8 changes: 8 additions & 0 deletions backends/vulkan/runtime/graph/Logging.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ inline std::ostream& operator<<(std::ostream& os, const api::utils::uvec4& v) {
return api::utils::operator<<(os, v);
}

inline std::ostream& operator<<(std::ostream& os, const api::utils::ivec3& v) {
return api::utils::operator<<(os, v);
}

inline std::ostream& operator<<(std::ostream& os, const api::utils::ivec4& v) {
return api::utils::operator<<(os, v);
}

template <typename T>
inline std::ostream& operator<<(std::ostream& os, const std::optional<T>& opt) {
os << "[";
Expand Down
54 changes: 54 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/copy_offset.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#version 450 core

#define PRECISION ${PRECISION}

#define VEC4_T ${texel_type(DTYPE)}

layout(std430) buffer;

#include "indexing_utils.h"

layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;

layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits {
ivec3 out_limits;
};

layout(set = 0, binding = 3) uniform PRECISION restrict InLimits {
ivec3 in_limits;
};



layout(set = 0, binding = 4) uniform PRECISION restrict CopyArgs {
ivec3 range;
int unused0;
ivec3 src_offset;
int unused1;
ivec3 dst_offset;
int unused2;
};

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

void main() {
const ivec3 pos = ivec3(gl_GlobalInvocationID);

const ivec3 out_pos = pos + dst_offset;
const ivec3 in_pos = pos + src_offset;

if (any(greaterThanEqual(pos, range))) {
return;
}

imageStore(image_out, out_pos, texelFetch(image_in, in_pos, 0));
}
10 changes: 10 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/copy_offset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
copy_offset:
parameter_names_with_default_values:
DTYPE: float
NDIM: 3
generate_variant_forall:
DTYPE:
- VALUE: half
- VALUE: float
shader_variants:
- NAME: copy_offset
58 changes: 58 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/repeat_channel.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#version 450 core

#define PRECISION ${PRECISION}

#define VEC4_T ${texel_type(DTYPE)}

layout(std430) buffer;

#include "indexing_utils.h"

layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;

layout(set = 0, binding = 2) uniform PRECISION restrict RepeatArgs {
// With input_size (n, c_i, h, w) and repeat r
// out_size == (n, c_i * r, h, w)
ivec4 out_sizes;
ivec4 in_sizes;
};

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

layout(constant_id = 3) const int packed_dim = C_DIM;


void main() {
const ivec3 out_pos = ivec3(gl_GlobalInvocationID);

const ivec4 out_whcn = to_tensor_idx(out_pos, out_sizes, packed_dim);

if (any(greaterThanEqual(out_whcn, out_sizes))) {
return;
}

VEC4_T v;
// Loop over the 4 elements in texel, calculate the corresponding elem, and
// fetch. Not most efficient algorithm because likely we fetch same texel
// multiple times in this loop.

for (int i=0; i<4;i++) {
ivec4 in_whcn = out_whcn;
in_whcn.z = (out_whcn.z + i) % in_sizes.z;

ivec4 in_elem_pos = to_texture_elem_pos(in_whcn, in_sizes, packed_dim);

v[i] = VEC4_T(texelFetch(image_in, in_elem_pos.xyz, 0))[in_elem_pos.w];
}

imageStore(image_out, out_pos, v);
}
10 changes: 10 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/repeat_channel.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
repeat_channel:
parameter_names_with_default_values:
DTYPE: float
NDIM: 3
generate_variant_forall:
DTYPE:
- VALUE: half
- VALUE: float
shader_variants:
- NAME: repeat_channel
70 changes: 70 additions & 0 deletions backends/vulkan/runtime/graph/ops/impl/Copy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>

#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.h>
#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h>
#include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>

namespace vkcompute {

void add_copy_offset_node(
ComputeGraph& graph,
const ValueRef in,
const api::utils::ivec3& range,
const api::utils::ivec3& src_offset,
const api::utils::ivec3& dst_offset,
const ValueRef out) {
vTensorPtr t_in = graph.get_tensor(in);
vTensorPtr t_out = graph.get_tensor(out);

VK_CHECK_COND(check_memory_layout_is(*t_in, api::kChannelsPacked));
VK_CHECK_COND(check_memory_layout_is(*t_out, api::kChannelsPacked));

std::string kernel_name = "copy_offset";
kernel_name.reserve(kShaderNameReserve);
add_dtype_suffix(kernel_name, *t_out);

api::utils::uvec3 global_size = api::utils::make_uvec3(range);
api::utils::uvec3 local_size = adaptive_work_group_size(global_size);

const struct Block final {
api::utils::ivec3 range;
int32_t unused0;
api::utils::ivec3 src_offset;
int32_t unused1;
api::utils::ivec3 dst_offset;
int32_t unused2;
} offset_params{
range,
0,
src_offset,
0,
dst_offset,
0,
};

auto shader = VK_KERNEL_FROM_STR(kernel_name);

graph.execute_nodes().emplace_back(new ExecuteNode(
graph,
VK_KERNEL_FROM_STR(kernel_name),
global_size,
local_size,
// Inputs and Outputs
{{out, api::MemoryAccessType::WRITE}, {in, api::MemoryAccessType::READ}},
// Parameter buffers
{t_out->texture_limits_ubo(),
t_in->texture_limits_ubo(),
graph.create_params_buffer(offset_params)},
// Specialization Constants
{}));
}

} // namespace vkcompute
25 changes: 25 additions & 0 deletions backends/vulkan/runtime/graph/ops/impl/Copy.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>

#include <executorch/backends/vulkan/runtime/api/api.h>

namespace vkcompute {

void add_copy_offset_node(
ComputeGraph& graph,
const ValueRef in,
const api::utils::ivec3& range,
const api::utils::ivec3& src_offset,
const api::utils::ivec3& dst_offset,
const ValueRef out);

} // namespace vkcompute
4 changes: 4 additions & 0 deletions backends/vulkan/runtime/graph/ops/impl/Permute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ using api::utils::ivec3;
using api::utils::uvec2;
using api::utils::uvec4;

namespace {

void check_args(
const vTensor& in,
const std::vector<int64_t>& permute_dims,
Expand All @@ -39,6 +41,8 @@ void check_args(
"Output tensor dim size must match argument");
}

} // namespace

void add_permute_node(
ComputeGraph& graph,
ValueRef in,
Expand Down
Loading