Skip to content

Commit a6bab95

Browse files
SS-JIAfacebook-github-bot
authored andcommitted
Add support for buffer storage tensors (#3684)
Summary: Pull Request resolved: #3684 ## Context Add support for tensors that use buffer storage, in preparation for quantization support. For more context, the initial versions of quantized operators will target buffer based tensors. This is because the primary use-case is LLMs, which may contain tensors that may exceed the texture limits. Differential Revision: D57577019
1 parent ab1c8aa commit a6bab95

30 files changed

+1121
-426
lines changed

backends/vulkan/runtime/api/Context.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,5 +250,10 @@ ParamsBindList::ParamsBindList(
250250
std::copy(init_list.begin(), init_list.end(), bind_infos.begin());
251251
}
252252

253+
void ParamsBindList::append(const ParamsBindList& other) {
254+
bind_infos.insert(
255+
bind_infos.end(), other.bind_infos.begin(), other.bind_infos.end());
256+
}
257+
253258
} // namespace api
254259
} // namespace vkcompute

backends/vulkan/runtime/api/Context.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,8 @@ struct ParamsBindList final {
270270
std::vector<BufferBindInfo> bind_infos;
271271

272272
ParamsBindList(std::initializer_list<const BufferBindInfo> init_list);
273+
274+
void append(const ParamsBindList& other);
273275
};
274276

275277
class StorageBuffer final {

backends/vulkan/runtime/api/Tensor.cpp

Lines changed: 103 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -11,117 +11,87 @@
1111

1212
namespace vkcompute {
1313

14-
namespace {
15-
16-
/*
17-
* When stored on the GPU, one dimension will be aligned to the next multiple of
18-
* 4 in order to take advantage of vec4 data types. The dimension that is
19-
* packed is denoted by the GPUMemoryLayout. This function adjusts one of
20-
* the dimensions based on the desired memory format and storage type and
21-
* returns a sizes array describing the dimensions of the memory used to store
22-
* the tensor data on the GPU.
23-
*/
24-
std::vector<int64_t> calc_gpu_sizes(
14+
std::vector<int64_t> calculate_strides(
2515
const std::vector<int64_t>& sizes,
2616
const api::GPUMemoryLayout memory_layout,
27-
const api::StorageType storage_type) {
28-
std::vector<int64_t> gpu_sizes;
29-
if (storage_type == api::kBuffer) {
30-
gpu_sizes.resize(sizes.size());
31-
for (size_t i = 0; i < sizes.size(); i++) {
32-
gpu_sizes.at(i) = sizes.at(i);
17+
const bool texel_strides) {
18+
const int64_t dim_offset = api::to_dim_offset<int64_t>(memory_layout);
19+
const int64_t last_dim = sizes.size() - dim_offset;
20+
VK_CHECK_COND(last_dim >= 0);
21+
22+
size_t ndim = sizes.size();
23+
std::vector<int64_t> strides(ndim);
24+
25+
const int64_t last_dim_size = texel_strides
26+
? api::utils::div_up(sizes.at(last_dim), INT64_C(4))
27+
: sizes.at(last_dim);
28+
29+
for (int stride_d = ndim - 1; stride_d >= 0; stride_d--) {
30+
strides.at(stride_d) = 1;
31+
if (stride_d == last_dim) {
32+
continue;
3333
}
34-
}
35-
// For texture storage, tensors are typically stored using 3D image textures.
36-
// Batches are stacked along the depth dimension. To represent the physical
37-
// 3 dimensionality of the image texture (with concatenated batches) GPU sizes
38-
// will be fixed to 4 dimensions when using texture storage.
39-
else {
40-
VK_CHECK_COND(
41-
sizes.size() >= 0 && sizes.size() <= 4,
42-
"Texture storage only valid for 0 <= ndim <= 4, received: ",
43-
sizes.size());
44-
45-
gpu_sizes.resize(4);
46-
gpu_sizes.at(0) = api::utils::val_at(-4, sizes);
47-
gpu_sizes.at(1) = api::utils::val_at(-3, sizes);
48-
gpu_sizes.at(2) = api::utils::val_at(-2, sizes);
49-
gpu_sizes.at(3) = api::utils::val_at(-1, sizes);
50-
}
51-
52-
size_t ndim = gpu_sizes.size();
53-
switch (memory_layout) {
54-
case api::kWidthPacked:
55-
if (ndim >= 1) {
56-
gpu_sizes.at(ndim - 1) =
57-
api::utils::align_up(api::utils::val_at(-1, sizes), INT64_C(4));
34+
strides.at(stride_d) = last_dim_size;
35+
for (int size_d = ndim - 1; size_d > stride_d; size_d--) {
36+
if (size_d != last_dim) {
37+
strides.at(stride_d) *= sizes.at(size_d);
5838
}
59-
break;
39+
}
40+
}
41+
return strides;
42+
}
6043

61-
case api::kHeightPacked:
62-
if (ndim >= 2) {
63-
gpu_sizes.at(ndim - 2) =
64-
api::utils::align_up(api::utils::val_at(-2, sizes), INT64_C(4));
65-
}
66-
break;
44+
std::vector<int64_t> calculate_padded_sizes(
45+
const std::vector<int64_t>& sizes,
46+
const api::GPUMemoryLayout memory_layout) {
47+
int64_t ndim = sizes.size();
48+
if (ndim == 0) {
49+
ndim = 1;
50+
}
6751

68-
case api::kChannelsPacked:
69-
if (ndim >= 3) {
70-
gpu_sizes.at(ndim - 3) =
71-
api::utils::align_up(api::utils::val_at(-3, sizes), INT64_C(4));
72-
}
73-
break;
52+
// Tensor sizes will be unsqueezed up to the next multiple of 4
53+
const int64_t ndim_up4 = api::utils::align_up(ndim, INT64_C(4));
54+
std::vector<int64_t> gpu_sizes(ndim_up4);
55+
for (int64_t i = 0; i < ndim_up4; ++i) {
56+
gpu_sizes.at(i) = api::utils::val_at(i - ndim_up4, sizes);
7457
}
7558

59+
// Pad the packed dim to the next multiple of 4.
60+
const int64_t dim_offset = api::to_dim_offset<int64_t>(memory_layout);
61+
const int64_t padded_dim_size = api::utils::val_at(-dim_offset, sizes);
62+
gpu_sizes.at(ndim_up4 - dim_offset) =
63+
api::utils::align_up(padded_dim_size, INT64_C(4));
64+
7665
return gpu_sizes;
7766
}
7867

79-
/*
80-
* Creates a uvec3 denoting the extents of the image texture that will be
81-
* created to store a tensor of a given size.
82-
*/
83-
api::utils::uvec3 create_image_extents(
68+
api::utils::uvec3 calculate_texture_limits(
8469
const std::vector<int64_t>& gpu_sizes,
85-
const api::StorageType storage_type,
8670
const api::GPUMemoryLayout memory_layout) {
87-
size_t ndim = gpu_sizes.size();
71+
VK_CHECK_COND(gpu_sizes.size() == 4);
8872

89-
if (storage_type == api::kBuffer) {
90-
// image extents do not apply to buffer storage
91-
return {0u, 0u, 0u};
92-
} else {
93-
VK_CHECK_COND(
94-
ndim >= 1 && ndim <= 4,
95-
"Texture storage only valid for 1 <= ndim <= 4!");
96-
97-
using namespace api::utils;
98-
uint32_t width = safe_downcast<uint32_t>(val_at(-1, gpu_sizes));
99-
uint32_t height = safe_downcast<uint32_t>(val_at(-2, gpu_sizes));
100-
uint32_t channels = safe_downcast<uint32_t>(val_at(-3, gpu_sizes));
101-
uint32_t batch = safe_downcast<uint32_t>(val_at(-4, gpu_sizes));
102-
103-
switch (memory_layout) {
104-
case api::kWidthPacked:
105-
VK_CHECK_COND(width % 4 == 0, "Width must be divisible by 4!");
106-
width /= 4;
107-
break;
108-
case api::kHeightPacked:
109-
VK_CHECK_COND(height % 4 == 0, "Height must be divisible by 4!");
110-
height /= 4;
111-
break;
112-
case api::kChannelsPacked:
113-
VK_CHECK_COND(channels % 4 == 0, "Channels must be divisible by 4!");
114-
channels /= 4;
115-
break;
116-
default:
117-
VK_THROW("Invalid memory format used!");
118-
}
73+
uint32_t N = api::utils::safe_downcast<uint32_t>(gpu_sizes.at(0));
74+
uint32_t C = api::utils::safe_downcast<uint32_t>(gpu_sizes.at(1));
75+
uint32_t H = api::utils::safe_downcast<uint32_t>(gpu_sizes.at(2));
76+
uint32_t W = api::utils::safe_downcast<uint32_t>(gpu_sizes.at(3));
11977

120-
return {width, height, batch * channels};
78+
switch (memory_layout) {
79+
case api::kWidthPacked:
80+
VK_CHECK_COND(W % 4 == 0);
81+
W /= 4;
82+
break;
83+
case api::kHeightPacked:
84+
VK_CHECK_COND(H % 4 == 0);
85+
H /= 4;
86+
break;
87+
case api::kChannelsPacked:
88+
VK_CHECK_COND(C % 4 == 0);
89+
C /= 4;
90+
break;
12191
}
122-
}
12392

124-
} // namespace
93+
return {W, H, C * N};
94+
}
12595

12696
//
12797
// vTensor
@@ -138,12 +108,13 @@ vTensor::vTensor(
138108
memory_layout_(memory_layout),
139109
// Calculate sizes and strides
140110
sizes_(sizes.begin(), sizes.end()),
141-
gpu_sizes_{calc_gpu_sizes(sizes, memory_layout_, storage_type)},
111+
gpu_sizes_{calculate_padded_sizes(sizes, memory_layout_)},
142112
texture_limits_{{0, 0, 0}},
143113
// Utility Uniform Buffers that can be passed to shaders as arguments
144114
sizes_uniform_(),
145115
texture_limits_uniform_(),
146-
packed_dim_meta_(),
116+
strides_uniform_(),
117+
ntexels_uniform_(),
147118
// Construct Tensor storage
148119
storage_(
149120
context,
@@ -213,28 +184,22 @@ const api::BufferBindInfo vTensor::texture_limits_ubo() {
213184
return api::BufferBindInfo(texture_limits_uniform_.buffer());
214185
}
215186

216-
vTensor::PackedDimMeta vTensor::make_packed_dim_metadata() const {
217-
int64_t packed_dim = gpu_memory_layout_int();
218-
int32_t dim_size = api::utils::val_at(-(packed_dim + 1), sizes_);
219-
int32_t dim_size_padded = api::utils::val_at(-(packed_dim + 1), gpu_sizes_);
220-
int32_t dim_texel_len =
221-
api::utils::safe_downcast<int32_t>(extents().data[packed_dim]);
222-
int32_t padding = dim_size_padded - dim_size;
223-
224-
return {
225-
dim_size,
226-
dim_size_padded,
227-
dim_texel_len,
228-
padding,
229-
};
187+
const api::BufferBindInfo vTensor::strides_ubo() {
188+
if (!strides_uniform_.buffer()) {
189+
strides_uniform_ = api::UniformParamsBuffer(
190+
storage_.context_,
191+
api::utils::make_whcn_ivec4(
192+
calculate_strides(gpu_sizes_, memory_layout_)));
193+
}
194+
return api::BufferBindInfo(strides_uniform_.buffer());
230195
}
231196

232-
const api::BufferBindInfo vTensor::packed_dim_meta_ubo() {
233-
if (!packed_dim_meta_.buffer()) {
234-
packed_dim_meta_ =
235-
api::UniformParamsBuffer(storage_.context_, make_packed_dim_metadata());
197+
const api::BufferBindInfo vTensor::ntexels_ubo() {
198+
if (!ntexels_uniform_.buffer()) {
199+
ntexels_uniform_ =
200+
api::UniformParamsBuffer(storage_.context_, texel_numel());
236201
}
237-
return api::BufferBindInfo(packed_dim_meta_.buffer());
202+
return api::BufferBindInfo(ntexels_uniform_.buffer());
238203
}
239204

240205
VmaAllocationCreateInfo vTensor::get_allocation_create_info() const {
@@ -273,45 +238,48 @@ void vTensor::bind_allocation(const api::Allocation& allocation) {
273238

274239
void vTensor::update_size_metadata(const std::vector<int64_t>& new_sizes) {
275240
sizes_ = new_sizes;
276-
gpu_sizes_ = calc_gpu_sizes(sizes_, memory_layout_, storage_type());
241+
gpu_sizes_ = calculate_padded_sizes(sizes_, memory_layout_);
277242

278-
if (storage_type() != api::kBuffer) {
279-
// Calculate the extents of the image texture that would have been required
280-
// for a tensor of the new sizes.
281-
api::utils::uvec3 virtual_extents =
282-
create_image_extents(gpu_sizes_, storage_type(), memory_layout_);
283-
// Update the texture limits to reflect the new virtual extents.
284-
texture_limits_.limits = api::utils::ivec3{
285-
api::utils::safe_downcast<int32_t>(virtual_extents.data[0]),
286-
api::utils::safe_downcast<int32_t>(virtual_extents.data[1]),
287-
api::utils::safe_downcast<int32_t>(virtual_extents.data[2])};
288-
}
243+
// Calculate the extents of the image texture that would have been required
244+
// for a tensor of the new sizes.
245+
api::utils::uvec3 virtual_extents =
246+
calculate_texture_limits(gpu_sizes_, memory_layout_);
247+
248+
// Update the texture limits to reflect the new virtual extents.
249+
texture_limits_.limits = api::utils::ivec3{
250+
api::utils::safe_downcast<int32_t>(virtual_extents.data[0]),
251+
api::utils::safe_downcast<int32_t>(virtual_extents.data[1]),
252+
api::utils::safe_downcast<int32_t>(virtual_extents.data[2])};
289253

290254
if (sizes_uniform_.buffer()) {
291255
sizes_uniform_.update(api::utils::make_whcn_ivec4(sizes_));
292256
}
293257
if (texture_limits_uniform_.buffer()) {
294258
texture_limits_uniform_.update(texture_limits_);
295259
}
296-
if (packed_dim_meta_.buffer()) {
297-
packed_dim_meta_.update(make_packed_dim_metadata());
260+
if (strides_uniform_.buffer()) {
261+
strides_uniform_.update(api::utils::make_whcn_ivec4(
262+
calculate_strides(gpu_sizes_, memory_layout_)));
263+
}
264+
if (ntexels_uniform_.buffer()) {
265+
ntexels_uniform_.update(texel_numel());
298266
}
299267
}
300268

301269
void vTensor::reallocate(const std::vector<int64_t>& new_sizes) {
302270
update_size_metadata(new_sizes);
303271
storage_.discard_and_reallocate(
304-
calc_gpu_sizes(new_sizes, memory_layout_, storage_type()),
272+
calculate_padded_sizes(new_sizes, memory_layout_),
305273
memory_layout_,
306274
dtype_);
307275
}
308276

309277
void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
310-
// For texture storage check that the current texture is large enough for the
311-
// new sizes of the tensor.
312278
if (storage_type() != api::kBuffer) {
279+
// For texture storage check that the current texture is large enough for
280+
// the new sizes of the tensor.
313281
api::utils::uvec3 virtual_extents =
314-
create_image_extents(gpu_sizes_, storage_type(), memory_layout_);
282+
calculate_texture_limits(gpu_sizes_, memory_layout_);
315283

316284
bool valid_resize = virtual_extents.data[0] <= extents().data[0];
317285
valid_resize = valid_resize && virtual_extents.data[1] <= extents().data[1];
@@ -403,8 +371,7 @@ vTensorStorage::vTensorStorage(
403371
const bool allocate_memory)
404372
: context_(context),
405373
storage_type_{storage_type},
406-
extents_(
407-
create_image_extents(gpu_sizes, storage_type, gpu_memory_layout)),
374+
extents_(calculate_texture_limits(gpu_sizes, gpu_memory_layout)),
408375
buffer_length_{api::utils::multiply_integers(gpu_sizes)},
409376
image_(allocate_image(
410377
context_,
@@ -496,7 +463,7 @@ void vTensorStorage::discard_and_reallocate(
496463

497464
flush();
498465

499-
extents_ = create_image_extents(gpu_sizes, storage_type_, gpu_memory_layout);
466+
extents_ = calculate_texture_limits(gpu_sizes, gpu_memory_layout);
500467
image_ = allocate_image(
501468
context_,
502469
extents_,

0 commit comments

Comments
 (0)