Skip to content

[ET-VK] Bring back extents_ubo() as texture_limits_ubo() #3217

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 48 additions & 3 deletions backends/vulkan/runtime/api/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,10 @@ vTensor::vTensor(
// Calculate sizes and strides
sizes_(sizes.begin(), sizes.end()),
gpu_sizes_{calc_gpu_sizes(sizes, memory_layout_, storage_type)},
// Utility Uniform Buffer that can be passed to shaders as arguments
sizes_uniform_(context, api::utils::make_whcn_ivec4(sizes_)),
texture_limits_{{0, 0, 0}},
// Utility Uniform Buffers that can be passed to shaders as arguments
sizes_uniform_(),
texture_limits_uniform_(),
// Construct Tensor storage
storage_(
context,
Expand All @@ -149,6 +151,13 @@ vTensor::vTensor(
gpu_sizes_,
dtype_,
allocate_memory) {
if (storage_type != api::kBuffer) {
texture_limits_.limits = api::utils::ivec3{
api::utils::safe_downcast<int32_t>(storage_.extents_.data[0]),
api::utils::safe_downcast<int32_t>(storage_.extents_.data[1]),
api::utils::safe_downcast<int32_t>(storage_.extents_.data[2])};
}

if (dtype == api::kHalf) {
VK_CHECK_COND(
api::context()->adapter_ptr()->has_16bit_storage(),
Expand Down Expand Up @@ -187,6 +196,22 @@ api::VulkanBuffer& vTensor::buffer(
return storage_.buffer_;
}

const api::BufferBindInfo vTensor::sizes_ubo() {
if (!sizes_uniform_.buffer()) {
sizes_uniform_ = api::UniformParamsBuffer(
storage_.context_, api::utils::make_whcn_ivec4(sizes_));
}
return api::BufferBindInfo(sizes_uniform_.buffer());
}

const api::BufferBindInfo vTensor::texture_limits_ubo() {
if (!texture_limits_uniform_.buffer()) {
texture_limits_uniform_ =
api::UniformParamsBuffer(storage_.context_, texture_limits_);
}
return api::BufferBindInfo(texture_limits_uniform_.buffer());
}

VmaAllocationCreateInfo vTensor::get_allocation_create_info() const {
switch (storage_type()) {
case api::kBuffer:
Expand Down Expand Up @@ -224,7 +249,25 @@ void vTensor::bind_allocation(const api::MemoryAllocation& allocation) {
void vTensor::update_size_metadata(const std::vector<int64_t>& new_sizes) {
sizes_ = new_sizes;
gpu_sizes_ = calc_gpu_sizes(sizes_, memory_layout_, storage_type());
sizes_uniform_.update(api::utils::make_whcn_ivec4(sizes_));

if (storage_type() != api::kBuffer) {
// Calculate the extents of the image texture that would have been required
// for a tensor of the new sizes.
api::utils::uvec3 virtual_extents =
create_image_extents(gpu_sizes_, storage_type(), memory_layout_);
// Update the texture limits to reflect the new virtual extents.
texture_limits_.limits = api::utils::ivec3{
api::utils::safe_downcast<int32_t>(virtual_extents.data[0]),
api::utils::safe_downcast<int32_t>(virtual_extents.data[1]),
api::utils::safe_downcast<int32_t>(virtual_extents.data[2])};
}

if (sizes_uniform_.buffer()) {
sizes_uniform_.update(api::utils::make_whcn_ivec4(sizes_));
}
if (texture_limits_uniform_.buffer()) {
texture_limits_uniform_.update(texture_limits_);
}
}

void vTensor::reallocate(const std::vector<int64_t>& new_sizes) {
Expand All @@ -236,6 +279,8 @@ void vTensor::reallocate(const std::vector<int64_t>& new_sizes) {
}

void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
// For texture storage check that the current texture is large enough for the
// new sizes of the tensor.
if (storage_type() != api::kBuffer) {
api::utils::uvec3 virtual_extents =
create_image_extents(gpu_sizes_, storage_type(), memory_layout_);
Expand Down
32 changes: 26 additions & 6 deletions backends/vulkan/runtime/api/Tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,13 @@ class vTensorStorage final {
};

class vTensor final {
struct TextureLimits {
// Alignment is required to conform with Vulkan specification; a 3 or 4
// component vector with components of size N must have base alignment of
// 4N.
alignas(16) api::utils::ivec3 limits;
};

public:
explicit vTensor(
api::Context* context,
Expand All @@ -115,11 +122,18 @@ class vTensor final {

std::vector<int64_t> sizes_;
std::vector<int64_t> gpu_sizes_;
TextureLimits texture_limits_;

// A Vulkan uniform buffer containing the tensor sizes in WHCN that can be
// passed into a shader.
// A Vulkan uniform buffer containing the (W, H, C, N) tensor sizes that can
// be passed into a shader.
api::UniformParamsBuffer sizes_uniform_;

// A Vulkan uniform buffer containing the texture limits derived from the
// tensor's current size information that can be passed into a shader. Note
// that the texture limits may be different from the texture's extents if the
// tensor has been resized with `virtual_resize()`.
api::UniformParamsBuffer texture_limits_uniform_;

vTensorStorage storage_;

public:
Expand Down Expand Up @@ -194,11 +208,17 @@ class vTensor final {

/*
* Get the binding information for the uniform buffer object containing the
* tensor sizes to use in a compute shader.
* tensor sizes to use in a compute shader. Note that the GPU buffer will be
* allocated the first time this function is called.
*/
inline const api::BufferBindInfo sizes_ubo() {
return api::BufferBindInfo(sizes_uniform_.buffer());
}
const api::BufferBindInfo sizes_ubo();

/*
* Get the binding information for the uniform buffer object containing the
* texture limits to use in a compute shader. Note that the GPU buffer will be
* allocated the first time this function is called.
*/
const api::BufferBindInfo texture_limits_ubo();

inline size_t numel() const {
return api::utils::multiply_integers(sizes());
Expand Down
8 changes: 3 additions & 5 deletions backends/vulkan/runtime/graph/ops/glsl/conv2d.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
layout(set = 0, binding = 2) uniform PRECISION sampler2D kernel_in;
layout(set = 0, binding = 3) uniform PRECISION sampler2D bias_in;

layout(set = 0, binding = 4) uniform PRECISION restrict OutSizes {
ivec4 out_sizes;
layout(set = 0, binding = 4) uniform PRECISION restrict OutLimits {
ivec3 out_limits;
};

layout(set = 0, binding = 5) uniform PRECISION restrict InSizes {
Expand All @@ -44,16 +44,14 @@ layout(set = 0, binding = 7) uniform PRECISION restrict ExtraParams {

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

layout(constant_id = 3) const int packed_dim = C_DIM;

/*
* Computes a 2D convolution. Each shader invocation calculates the output at
* a single output location.
*/
void main() {
const ivec3 pos = ivec3(gl_GlobalInvocationID);

if (pos_out_of_bounds(pos, out_sizes, packed_dim)) {
if (any(greaterThanEqual(pos, out_limits))) {
return;
}

Expand Down
8 changes: 3 additions & 5 deletions backends/vulkan/runtime/graph/ops/glsl/conv2d_dw.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
layout(set = 0, binding = 2) uniform PRECISION sampler2D kernel_in;
layout(set = 0, binding = 3) uniform PRECISION sampler2D bias_in;

layout(set = 0, binding = 4) uniform PRECISION restrict OutSizes {
ivec4 out_sizes;
layout(set = 0, binding = 4) uniform PRECISION restrict OutLimits {
ivec3 out_limits;
};

layout(set = 0, binding = 5) uniform PRECISION restrict InSizes {
Expand All @@ -44,16 +44,14 @@ layout(set = 0, binding = 7) uniform PRECISION restrict ExtraParams {

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

layout(constant_id = 3) const int packed_dim = C_DIM;

/*
* Computes a depthwise convolution. Each shader invocation calculates the
* output at a single output location.
*/
void main() {
const ivec3 pos = ivec3(gl_GlobalInvocationID);

if (pos_out_of_bounds(pos, out_sizes, packed_dim)) {
if (any(greaterThanEqual(pos, out_limits))) {
return;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
layout(set = 0, binding = 2) uniform PRECISION sampler2D kernel_in;
layout(set = 0, binding = 3) uniform PRECISION sampler2D bias_in;

layout(set = 0, binding = 4) uniform PRECISION restrict OutSizes {
ivec4 out_sizes;
layout(set = 0, binding = 4) uniform PRECISION restrict OutLimits {
ivec3 out_limits;
};

layout(set = 0, binding = 5) uniform PRECISION restrict InSizes {
Expand All @@ -44,16 +44,14 @@ layout(set = 0, binding = 7) uniform PRECISION restrict ExtraParams {

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

layout(constant_id = 3) const int packed_dim = C_DIM;

/*
* Computes a depthwise convolution. Each shader invocation calculates the
* output at a single output location.
*/
void main() {
const ivec3 pos = ivec3(gl_GlobalInvocationID);

if (pos_out_of_bounds(pos, out_sizes, packed_dim)) {
if (any(greaterThanEqual(pos, out_limits))) {
return;
}

Expand Down
10 changes: 4 additions & 6 deletions backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
layout(set = 0, binding = 2) uniform PRECISION sampler2D kernel_in;
layout(set = 0, binding = 3) uniform PRECISION sampler2D bias_in;

layout(set = 0, binding = 4) uniform PRECISION restrict OutSizes {
ivec4 out_sizes;
layout(set = 0, binding = 4) uniform PRECISION restrict OutLimits {
ivec3 out_limits;
};

layout(set = 0, binding = 5) uniform PRECISION restrict InSizes {
Expand All @@ -44,8 +44,6 @@ layout(set = 0, binding = 7) uniform PRECISION restrict ExtraParams {

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

layout(constant_id = 3) const int packed_dim = C_DIM;

/*
* Computes a 2D pointwise convolution of an NxN output tile. Calculating an
* output tile for pointwise convolution is more efficient because the kernel
Expand All @@ -71,7 +69,7 @@ void main() {

// If the top left position is out of bounds, then this invocation will have
// no work to do.
if (pos_out_of_bounds(pos[0], out_sizes, packed_dim)) {
if (any(greaterThanEqual(pos[0], out_limits))) {
return;
}

Expand Down Expand Up @@ -146,7 +144,7 @@ void main() {
}

for (int i = 0; i < ${TILE_SIZE * TILE_SIZE}; ++i) {
if (!pos_out_of_bounds(pos[i], out_sizes, packed_dim)) {
if (all(lessThan(pos[i], out_limits))) {
imageStore(image_out, pos[i], sum[i]);
}
}
Expand Down
8 changes: 4 additions & 4 deletions backends/vulkan/runtime/graph/ops/glsl/conv_transpose2d.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
layout(set = 0, binding = 2) uniform PRECISION sampler2D kernel_in;
layout(set = 0, binding = 3) uniform PRECISION sampler2D bias_in;

layout(set = 0, binding = 4) uniform PRECISION restrict OutSizes {
ivec4 out_sizes;
layout(set = 0, binding = 4) uniform PRECISION restrict OutLimits {
ivec3 out_limits;
};

layout(set = 0, binding = 5) uniform PRECISION restrict InExtents {
layout(set = 0, binding = 5) uniform PRECISION restrict InSizes {
ivec4 in_sizes;
};

Expand Down Expand Up @@ -54,7 +54,7 @@ layout(constant_id = 3) const int packed_dim = C_DIM;
void main() {
const ivec3 pos = ivec3(gl_GlobalInvocationID);

if (pos_out_of_bounds(pos, out_sizes, packed_dim)) {
if (any(greaterThanEqual(pos, out_limits))) {
return;
}

Expand Down
8 changes: 3 additions & 5 deletions backends/vulkan/runtime/graph/ops/glsl/matmul.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict
layout(set = 0, binding = 1) uniform PRECISION ${SAMPLER_T[NDIM][DTYPE]} im_mat1;
layout(set = 0, binding = 2) uniform PRECISION ${SAMPLER_T[NDIM][DTYPE]} im_mat2;

layout(set = 0, binding = 3) uniform PRECISION restrict OutSizes {
ivec4 out_sizes;
layout(set = 0, binding = 3) uniform PRECISION restrict OutLimits {
ivec3 out_limits;
};

layout(set = 0, binding = 4) uniform PRECISION restrict InSizes {
Expand All @@ -26,12 +26,10 @@ layout(set = 0, binding = 4) uniform PRECISION restrict InSizes {

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

layout(constant_id = 3) const int out_packed_dim = C_DIM;

void main() {
const ivec3 pos = ivec3(gl_GlobalInvocationID);

if (pos_out_of_bounds(pos, out_sizes, out_packed_dim)) {
if (any(greaterThanEqual(pos, out_limits))) {
return;
}

Expand Down
8 changes: 3 additions & 5 deletions backends/vulkan/runtime/graph/ops/glsl/max_pool2d.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict
layout(set = 0, binding = 1, ${IMAGE_FORMAT["int"]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM]["int"]} image_idx;
layout(set = 0, binding = 2) uniform PRECISION sampler3D image_in;

layout(set = 0, binding = 3) uniform PRECISION restrict OutSizes {
ivec4 out_sizes;
layout(set = 0, binding = 3) uniform PRECISION restrict OutLimits {
ivec3 out_limits;
};

layout(set = 0, binding = 4) uniform PRECISION restrict InSizes {
Expand All @@ -36,12 +36,10 @@ layout(set = 0, binding = 5) uniform PRECISION restrict Params {

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

layout(constant_id = 3) const int packed_dim = C_DIM;

void main() {
const ivec3 pos = ivec3(gl_GlobalInvocationID);

if (pos_out_of_bounds(pos, out_sizes, packed_dim)) {
if (any(greaterThanEqual(pos, out_limits))) {
return;
}

Expand Down
12 changes: 7 additions & 5 deletions backends/vulkan/runtime/graph/ops/glsl/native_layer_norm.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,24 @@ layout(set = 0, binding = 3) uniform PRECISION sampler3D image_in;
layout(set = 0, binding = 4) uniform PRECISION sampler3D weight_in;
layout(set = 0, binding = 5) uniform PRECISION sampler3D bias_in;

layout(set = 0, binding = 6) uniform PRECISION restrict Sizes {
layout(set = 0, binding = 6) uniform PRECISION restrict OutLimits {
ivec3 out_limits;
};

layout(set = 0, binding = 7) uniform PRECISION restrict Sizes {
ivec4 sizes;
};

layout(set = 0, binding = 7) uniform PRECISION restrict Epsilon {
layout(set = 0, binding = 8) uniform PRECISION restrict Epsilon {
float epsilon;
};

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

layout(constant_id = 3) const int packed_dim = C_DIM;

void main() {
const ivec3 pos = ivec3(gl_GlobalInvocationID);

if (pos_out_of_bounds(pos, sizes, packed_dim)) {
if (any(greaterThanEqual(pos, out_limits))) {
return;
}

Expand Down
Loading