Skip to content

Commit 52ff39b

Browse files
SS-JIAfacebook-github-bot
authored andcommitted
Add support for buffer storage tensors (#3684)
Summary: Pull Request resolved: #3684 ## Context Add support for tensors that use buffer storage, in preparation for quantization support. For more context, the initial versions of quantized operators will target buffer based tensors. This is because the primary use-case is LLMs, which may contain tensors that may exceed the texture limits. Differential Revision: D57577019
1 parent d79ba63 commit 52ff39b

30 files changed

+1161
-425
lines changed

backends/vulkan/runtime/api/Context.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,5 +241,10 @@ ParamsBindList::ParamsBindList(
241241
std::copy(init_list.begin(), init_list.end(), bind_infos.begin());
242242
}
243243

244+
void ParamsBindList::append(const ParamsBindList& other) {
245+
bind_infos.insert(
246+
bind_infos.end(), other.bind_infos.begin(), other.bind_infos.end());
247+
}
248+
244249
} // namespace api
245250
} // namespace vkcompute

backends/vulkan/runtime/api/Context.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,8 @@ struct ParamsBindList final {
270270
std::vector<BufferBindInfo> bind_infos;
271271

272272
ParamsBindList(std::initializer_list<const BufferBindInfo> init_list);
273+
274+
void append(const ParamsBindList& other);
273275
};
274276

275277
class StorageBuffer final {

backends/vulkan/runtime/api/Tensor.cpp

Lines changed: 125 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -11,117 +11,87 @@
1111

1212
namespace vkcompute {
1313

14-
namespace {
15-
16-
/*
17-
* When stored on the GPU, one dimension will be aligned to the next multiple of
18-
* 4 in order to take advantage of vec4 data types. The dimension that is
19-
* packed is denoted by the GPUMemoryLayout. This function adjusts one of
20-
* the dimensions based on the desired memory format and storage type and
21-
* returns a sizes array describing the dimensions of the memory used to store
22-
* the tensor data on the GPU.
23-
*/
24-
std::vector<int64_t> calc_gpu_sizes(
14+
std::vector<int64_t> calculate_strides(
2515
const std::vector<int64_t>& sizes,
2616
const api::GPUMemoryLayout memory_layout,
27-
const api::StorageType storage_type) {
28-
std::vector<int64_t> gpu_sizes;
29-
if (storage_type == api::kBuffer) {
30-
gpu_sizes.resize(sizes.size());
31-
for (size_t i = 0; i < sizes.size(); i++) {
32-
gpu_sizes.at(i) = sizes.at(i);
17+
const bool texel_strides) {
18+
const int64_t dim_offset = api::to_dim_offset<int64_t>(memory_layout);
19+
const int64_t last_dim = sizes.size() - dim_offset;
20+
VK_CHECK_COND(last_dim >= 0);
21+
22+
size_t ndim = sizes.size();
23+
std::vector<int64_t> strides(ndim);
24+
25+
const int64_t last_dim_size = texel_strides
26+
? api::utils::div_up(sizes.at(last_dim), INT64_C(4))
27+
: sizes.at(last_dim);
28+
29+
for (int stride_d = ndim - 1; stride_d >= 0; stride_d--) {
30+
strides.at(stride_d) = 1;
31+
if (stride_d == last_dim) {
32+
continue;
3333
}
34-
}
35-
// For texture storage, tensors are typically stored using 3D image textures.
36-
// Batches are stacked along the depth dimension. To represent the physical
37-
// 3 dimensionality of the image texture (with concatenated batches) GPU sizes
38-
// will be fixed to 4 dimensions when using texture storage.
39-
else {
40-
VK_CHECK_COND(
41-
sizes.size() >= 0 && sizes.size() <= 4,
42-
"Texture storage only valid for 0 <= ndim <= 4, received: ",
43-
sizes.size());
44-
45-
gpu_sizes.resize(4);
46-
gpu_sizes.at(0) = api::utils::val_at(-4, sizes);
47-
gpu_sizes.at(1) = api::utils::val_at(-3, sizes);
48-
gpu_sizes.at(2) = api::utils::val_at(-2, sizes);
49-
gpu_sizes.at(3) = api::utils::val_at(-1, sizes);
50-
}
51-
52-
size_t ndim = gpu_sizes.size();
53-
switch (memory_layout) {
54-
case api::kWidthPacked:
55-
if (ndim >= 1) {
56-
gpu_sizes.at(ndim - 1) =
57-
api::utils::align_up(api::utils::val_at(-1, sizes), INT64_C(4));
34+
strides.at(stride_d) = last_dim_size;
35+
for (int size_d = ndim - 1; size_d > stride_d; size_d--) {
36+
if (size_d != last_dim) {
37+
strides.at(stride_d) *= sizes.at(size_d);
5838
}
59-
break;
39+
}
40+
}
41+
return strides;
42+
}
6043

61-
case api::kHeightPacked:
62-
if (ndim >= 2) {
63-
gpu_sizes.at(ndim - 2) =
64-
api::utils::align_up(api::utils::val_at(-2, sizes), INT64_C(4));
65-
}
66-
break;
44+
std::vector<int64_t> calculate_padded_sizes(
45+
const std::vector<int64_t>& sizes,
46+
const api::GPUMemoryLayout memory_layout) {
47+
int64_t ndim = sizes.size();
48+
if (ndim == 0) {
49+
ndim = 1;
50+
}
6751

68-
case api::kChannelsPacked:
69-
if (ndim >= 3) {
70-
gpu_sizes.at(ndim - 3) =
71-
api::utils::align_up(api::utils::val_at(-3, sizes), INT64_C(4));
72-
}
73-
break;
52+
// Tensor sizes will be unsqueezed up to the next multiple of 4
53+
const int64_t ndim_up4 = api::utils::align_up(ndim, INT64_C(4));
54+
std::vector<int64_t> gpu_sizes(ndim_up4);
55+
for (int64_t i = 0; i < ndim_up4; ++i) {
56+
gpu_sizes.at(i) = api::utils::val_at(i - ndim_up4, sizes);
7457
}
7558

59+
// Pad the packed dim to the next multiple of 4.
60+
const int64_t dim_offset = api::to_dim_offset<int64_t>(memory_layout);
61+
const int64_t padded_dim_size = api::utils::val_at(-dim_offset, sizes);
62+
gpu_sizes.at(ndim_up4 - dim_offset) =
63+
api::utils::align_up(padded_dim_size, INT64_C(4));
64+
7665
return gpu_sizes;
7766
}
7867

79-
/*
80-
* Creates a uvec3 denoting the extents of the image texture that will be
81-
* created to store a tensor of a given size.
82-
*/
83-
api::utils::uvec3 create_image_extents(
68+
api::utils::uvec3 calculate_texture_limits(
8469
const std::vector<int64_t>& gpu_sizes,
85-
const api::StorageType storage_type,
8670
const api::GPUMemoryLayout memory_layout) {
87-
size_t ndim = gpu_sizes.size();
71+
VK_CHECK_COND(gpu_sizes.size() == 4);
8872

89-
if (storage_type == api::kBuffer) {
90-
// image extents do not apply to buffer storage
91-
return {0u, 0u, 0u};
92-
} else {
93-
VK_CHECK_COND(
94-
ndim >= 1 && ndim <= 4,
95-
"Texture storage only valid for 1 <= ndim <= 4!");
96-
97-
using namespace api::utils;
98-
uint32_t width = safe_downcast<uint32_t>(val_at(-1, gpu_sizes));
99-
uint32_t height = safe_downcast<uint32_t>(val_at(-2, gpu_sizes));
100-
uint32_t channels = safe_downcast<uint32_t>(val_at(-3, gpu_sizes));
101-
uint32_t batch = safe_downcast<uint32_t>(val_at(-4, gpu_sizes));
102-
103-
switch (memory_layout) {
104-
case api::kWidthPacked:
105-
VK_CHECK_COND(width % 4 == 0, "Width must be divisible by 4!");
106-
width /= 4;
107-
break;
108-
case api::kHeightPacked:
109-
VK_CHECK_COND(height % 4 == 0, "Height must be divisible by 4!");
110-
height /= 4;
111-
break;
112-
case api::kChannelsPacked:
113-
VK_CHECK_COND(channels % 4 == 0, "Channels must be divisible by 4!");
114-
channels /= 4;
115-
break;
116-
default:
117-
VK_THROW("Invalid memory format used!");
118-
}
73+
uint32_t N = api::utils::safe_downcast<uint32_t>(gpu_sizes.at(0));
74+
uint32_t C = api::utils::safe_downcast<uint32_t>(gpu_sizes.at(1));
75+
uint32_t H = api::utils::safe_downcast<uint32_t>(gpu_sizes.at(2));
76+
uint32_t W = api::utils::safe_downcast<uint32_t>(gpu_sizes.at(3));
11977

120-
return {width, height, batch * channels};
78+
switch (memory_layout) {
79+
case api::kWidthPacked:
80+
VK_CHECK_COND(W % 4 == 0);
81+
W /= 4;
82+
break;
83+
case api::kHeightPacked:
84+
VK_CHECK_COND(H % 4 == 0);
85+
H /= 4;
86+
break;
87+
case api::kChannelsPacked:
88+
VK_CHECK_COND(C % 4 == 0);
89+
C /= 4;
90+
break;
12191
}
122-
}
12392

124-
} // namespace
93+
return {W, H, C * N};
94+
}
12595

12696
//
12797
// vTensor
@@ -138,12 +108,14 @@ vTensor::vTensor(
138108
memory_layout_(memory_layout),
139109
// Calculate sizes and strides
140110
sizes_(sizes.begin(), sizes.end()),
141-
gpu_sizes_{calc_gpu_sizes(sizes, memory_layout_, storage_type)},
111+
gpu_sizes_{calculate_padded_sizes(sizes, memory_layout_)},
142112
texture_limits_{{0, 0, 0}},
143113
// Utility Uniform Buffers that can be passed to shaders as arguments
144114
sizes_uniform_(),
145115
texture_limits_uniform_(),
146-
packed_dim_meta_(),
116+
strides_uniform_(),
117+
ntexels_uniform_(),
118+
buffer_meta_uniform_(),
147119
// Construct Tensor storage
148120
storage_(
149121
context,
@@ -197,6 +169,16 @@ api::VulkanBuffer& vTensor::buffer(
197169
return storage_.buffer_;
198170
}
199171

172+
vTensor::BufferMetadata vTensor::make_buffer_metadata() {
173+
auto strides = calculate_strides(gpu_sizes_, memory_layout_);
174+
return BufferMetadata{
175+
api::utils::make_whcn_ivec4(sizes_),
176+
api::utils::make_whcn_ivec4(
177+
calculate_strides(gpu_sizes_, memory_layout_)),
178+
texel_numel(),
179+
packed_dim_ntexels()};
180+
}
181+
200182
const api::BufferBindInfo vTensor::sizes_ubo() {
201183
if (!sizes_uniform_.buffer()) {
202184
sizes_uniform_ = api::UniformParamsBuffer(
@@ -213,28 +195,30 @@ const api::BufferBindInfo vTensor::texture_limits_ubo() {
213195
return api::BufferBindInfo(texture_limits_uniform_.buffer());
214196
}
215197

216-
vTensor::PackedDimMeta vTensor::make_packed_dim_metadata() const {
217-
int64_t packed_dim = gpu_memory_layout_int();
218-
int32_t dim_size = api::utils::val_at(-(packed_dim + 1), sizes_);
219-
int32_t dim_size_padded = api::utils::val_at(-(packed_dim + 1), gpu_sizes_);
220-
int32_t dim_texel_len =
221-
api::utils::safe_downcast<int32_t>(extents().data[packed_dim]);
222-
int32_t padding = dim_size_padded - dim_size;
223-
224-
return {
225-
dim_size,
226-
dim_size_padded,
227-
dim_texel_len,
228-
padding,
229-
};
198+
const api::BufferBindInfo vTensor::strides_ubo() {
199+
if (!strides_uniform_.buffer()) {
200+
strides_uniform_ = api::UniformParamsBuffer(
201+
storage_.context_,
202+
api::utils::make_whcn_ivec4(
203+
calculate_strides(gpu_sizes_, memory_layout_)));
204+
}
205+
return api::BufferBindInfo(strides_uniform_.buffer());
230206
}
231207

232-
const api::BufferBindInfo vTensor::packed_dim_meta_ubo() {
233-
if (!packed_dim_meta_.buffer()) {
234-
packed_dim_meta_ =
235-
api::UniformParamsBuffer(storage_.context_, make_packed_dim_metadata());
208+
const api::BufferBindInfo vTensor::ntexels_ubo() {
209+
if (!ntexels_uniform_.buffer()) {
210+
ntexels_uniform_ =
211+
api::UniformParamsBuffer(storage_.context_, texel_numel());
236212
}
237-
return api::BufferBindInfo(packed_dim_meta_.buffer());
213+
return api::BufferBindInfo(ntexels_uniform_.buffer());
214+
}
215+
216+
const api::BufferBindInfo vTensor::buffer_meta_ubo() {
217+
if (!buffer_meta_uniform_.buffer()) {
218+
sizes_uniform_ =
219+
api::UniformParamsBuffer(storage_.context_, make_buffer_metadata());
220+
}
221+
return api::BufferBindInfo(sizes_uniform_.buffer());
238222
}
239223

240224
VmaAllocationCreateInfo vTensor::get_allocation_create_info() const {
@@ -273,45 +257,51 @@ void vTensor::bind_allocation(const api::Allocation& allocation) {
273257

274258
void vTensor::update_size_metadata(const std::vector<int64_t>& new_sizes) {
275259
sizes_ = new_sizes;
276-
gpu_sizes_ = calc_gpu_sizes(sizes_, memory_layout_, storage_type());
260+
gpu_sizes_ = calculate_padded_sizes(sizes_, memory_layout_);
277261

278-
if (storage_type() != api::kBuffer) {
279-
// Calculate the extents of the image texture that would have been required
280-
// for a tensor of the new sizes.
281-
api::utils::uvec3 virtual_extents =
282-
create_image_extents(gpu_sizes_, storage_type(), memory_layout_);
283-
// Update the texture limits to reflect the new virtual extents.
284-
texture_limits_.limits = api::utils::ivec3{
285-
api::utils::safe_downcast<int32_t>(virtual_extents.data[0]),
286-
api::utils::safe_downcast<int32_t>(virtual_extents.data[1]),
287-
api::utils::safe_downcast<int32_t>(virtual_extents.data[2])};
288-
}
262+
// Calculate the extents of the image texture that would have been required
263+
// for a tensor of the new sizes.
264+
api::utils::uvec3 virtual_extents =
265+
calculate_texture_limits(gpu_sizes_, memory_layout_);
266+
267+
// Update the texture limits to reflect the new virtual extents.
268+
texture_limits_.limits = api::utils::ivec3{
269+
api::utils::safe_downcast<int32_t>(virtual_extents.data[0]),
270+
api::utils::safe_downcast<int32_t>(virtual_extents.data[1]),
271+
api::utils::safe_downcast<int32_t>(virtual_extents.data[2])};
289272

290273
if (sizes_uniform_.buffer()) {
291274
sizes_uniform_.update(api::utils::make_whcn_ivec4(sizes_));
292275
}
293276
if (texture_limits_uniform_.buffer()) {
294277
texture_limits_uniform_.update(texture_limits_);
295278
}
296-
if (packed_dim_meta_.buffer()) {
297-
packed_dim_meta_.update(make_packed_dim_metadata());
279+
if (strides_uniform_.buffer()) {
280+
strides_uniform_.update(api::utils::make_whcn_ivec4(
281+
calculate_strides(gpu_sizes_, memory_layout_)));
282+
}
283+
if (ntexels_uniform_.buffer()) {
284+
ntexels_uniform_.update(texel_numel());
285+
}
286+
if (buffer_meta_uniform_.buffer()) {
287+
buffer_meta_uniform_.update(make_buffer_metadata());
298288
}
299289
}
300290

301291
void vTensor::reallocate(const std::vector<int64_t>& new_sizes) {
302292
update_size_metadata(new_sizes);
303293
storage_.discard_and_reallocate(
304-
calc_gpu_sizes(new_sizes, memory_layout_, storage_type()),
294+
calculate_padded_sizes(new_sizes, memory_layout_),
305295
memory_layout_,
306296
dtype_);
307297
}
308298

309299
void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
310-
// For texture storage check that the current texture is large enough for the
311-
// new sizes of the tensor.
312300
if (storage_type() != api::kBuffer) {
301+
// For texture storage check that the current texture is large enough for
302+
// the new sizes of the tensor.
313303
api::utils::uvec3 virtual_extents =
314-
create_image_extents(gpu_sizes_, storage_type(), memory_layout_);
304+
calculate_texture_limits(gpu_sizes_, memory_layout_);
315305

316306
bool valid_resize = virtual_extents.data[0] <= extents().data[0];
317307
valid_resize = valid_resize && virtual_extents.data[1] <= extents().data[1];
@@ -403,8 +393,7 @@ vTensorStorage::vTensorStorage(
403393
const bool allocate_memory)
404394
: context_(context),
405395
storage_type_{storage_type},
406-
extents_(
407-
create_image_extents(gpu_sizes, storage_type, gpu_memory_layout)),
396+
extents_(calculate_texture_limits(gpu_sizes, gpu_memory_layout)),
408397
buffer_length_{api::utils::multiply_integers(gpu_sizes)},
409398
image_(allocate_image(
410399
context_,
@@ -496,7 +485,7 @@ void vTensorStorage::discard_and_reallocate(
496485

497486
flush();
498487

499-
extents_ = create_image_extents(gpu_sizes, storage_type_, gpu_memory_layout);
488+
extents_ = calculate_texture_limits(gpu_sizes, gpu_memory_layout);
500489
image_ = allocate_image(
501490
context_,
502491
extents_,

0 commit comments

Comments
 (0)