Skip to content

Commit fbb0acf

Browse files
pytorchbotSS-JIA
andauthored
[ET-VK] Used hashed layout instead of axis map UBO (#6574)
Pull Request resolved: #6534 ## Context #6358 showed that passing in the axis map of a tensor via a specialization constant allows shaders to utilize the axis map in indexing calculations with minimal impact to latency. This diff extends that idea, and introduces the concept of a hashed layout. The hashed layout is a 32 bit integer where: 1. Bits 28-31: `axis_map[0]` 2. Bits 24-27: `axis_map[1]` 3. Bits 20-23: `axis_map[2]` 4. Bits 16-19: `axis_map[3]` 5. Bits 12-15: `packed_dim` 6. Bits 0-11: unused Essentially, the integer is divided into chunks of 4 bits, and each chunk is used to represent a value from the `axis_map` + `packed_dim`. This way, the entire description of how the tensor is represented as a texture can be passed into a compute shader with a single specialization constant. Within the compute shader, the axis map and packed dim can be extracted like so: ``` ${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")} const lowp ivec4 in_axis_map = unhash_axis_map(in_layout); const lowp int in_packed_dim = unhash_packed_dim(in_layout); ``` Note that `lowp` can be used because the expected values are limited by the dimensionality of the tensor, therefore we expect only small values. ## Changes 1. Introduce `hashed_layout` 2. Replace all uses of `axis_map_ubo` with `hashed_layout` 3. Remove `axis_map_ubo` from `vTensor. This also reduces the size of the class. ghstack-source-id: 250928240 @exported-using-ghexport Differential Revision: [D65085141](https://our.internmc.facebook.com/intern/diff/D65085141/) Co-authored-by: Stephen Jia <[email protected]>
1 parent b07be36 commit fbb0acf

30 files changed

+216
-213
lines changed

backends/vulkan/runtime/api/containers/Tensor.cpp

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -454,7 +454,6 @@ vTensor::vTensor(
454454
sizes_uniform_(),
455455
strides_uniform_(),
456456
numel_uniform_(),
457-
axis_map_uniform_(),
458457
logical_limits_uniform_(),
459458
// Construct Tensor storage
460459
storage_(
@@ -501,7 +500,6 @@ vTensor::vTensor(
501500
sizes_uniform_(),
502501
strides_uniform_(),
503502
numel_uniform_(),
504-
axis_map_uniform_(),
505503
logical_limits_uniform_(),
506504
// Construct Tensor storage
507505
storage_(context, image) {
@@ -527,7 +525,6 @@ vTensor::vTensor(vTensor& other)
527525
sizes_uniform_(),
528526
strides_uniform_(),
529527
numel_uniform_(),
530-
axis_map_uniform_(),
531528
logical_limits_uniform_(),
532529
// Copy Tensor storage
533530
storage_(other.storage_) {}
@@ -553,7 +550,6 @@ vTensor::vTensor(
553550
sizes_uniform_(),
554551
strides_uniform_(),
555552
numel_uniform_(),
556-
axis_map_uniform_(),
557553
logical_limits_uniform_(),
558554
// Copy Tensor storage
559555
storage_(other.storage_, vkapi::element_size(dtype_) * offset_numel) {
@@ -630,14 +626,6 @@ const vkapi::BufferBindInfo vTensor::strides_ubo() {
630626
return vkapi::BufferBindInfo(strides_uniform_.buffer());
631627
}
632628

633-
const vkapi::BufferBindInfo vTensor::axis_map_ubo() {
634-
if (!axis_map_uniform_.buffer()) {
635-
axis_map_uniform_ =
636-
ParamsBuffer(storage_.context_, utils::make_ivec4(axis_map_));
637-
}
638-
return vkapi::BufferBindInfo(axis_map_uniform_.buffer());
639-
}
640-
641629
const vkapi::BufferBindInfo vTensor::logical_limits_ubo() {
642630
if (!logical_limits_uniform_.buffer()) {
643631
logical_limits_uniform_ = ParamsBuffer(storage_.context_, logical_limits_);
@@ -710,9 +698,6 @@ void vTensor::update_metadata() {
710698
if (numel_uniform_.buffer()) {
711699
numel_uniform_.update(numel_);
712700
}
713-
if (axis_map_uniform_.buffer()) {
714-
axis_map_uniform_.update(utils::make_ivec4(axis_map_));
715-
}
716701
if (logical_limits_uniform_.buffer()) {
717702
logical_limits_uniform_.update(logical_limits_);
718703
}

backends/vulkan/runtime/api/containers/Tensor.h

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,6 @@ class vTensor final {
308308
ParamsBuffer sizes_uniform_;
309309
ParamsBuffer strides_uniform_;
310310
ParamsBuffer numel_uniform_;
311-
ParamsBuffer axis_map_uniform_;
312311
ParamsBuffer logical_limits_uniform_;
313312

314313
vTensorStorage storage_;
@@ -430,6 +429,19 @@ class vTensor final {
430429
return axis_map_;
431430
}
432431

432+
/*
433+
* Returns a single int32_t that contains the values of the axis map and the
434+
* packed dimension packed into a single int32_t, such that it can be used as
435+
* a specialization constant in a compute shader. This allows for the SPIR-V
436+
* to bytecode compilation to perform compile-time unfolding on the axis map.
437+
* Each element of the axis map and the value of the packed dimension take up
438+
* 4 bits in the packed int32_t.
439+
*/
440+
inline int32_t hashed_layout() const {
441+
return axis_map_.at(0) + (axis_map_.at(1) << 4) + (axis_map_.at(2) << 8) +
442+
(axis_map_.at(3) << 12) + (packed_dim_ << 16);
443+
}
444+
433445
/*
434446
* Return true if the tensor's axis map is {0, 1, 2, concat_dim}. This means
435447
* that the width dim is mapped to the width axis of the texture, the height
@@ -463,12 +475,6 @@ class vTensor final {
463475
*/
464476
const vkapi::BufferBindInfo strides_ubo();
465477

466-
/*
467-
* Returns a GPU buffer containing the texture axis mapping for each dimension
468-
* of the tensor, in WHCN dimension order.
469-
*/
470-
const vkapi::BufferBindInfo axis_map_ubo();
471-
472478
/*
473479
* Returns a GPU buffer containing the logical limits of the tensor. See the
474480
* comments for logical_limits() for more context.

backends/vulkan/runtime/graph/ComputeGraph.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,10 @@ class ComputeGraph final {
318318
return values_.at(idx).toConstTensor().estimate_memory_layout();
319319
}
320320

321+
inline int32_t hashed_layout_of(const ValueRef idx) const {
322+
return values_.at(idx).toConstTensor().hashed_layout();
323+
}
324+
321325
inline int32_t packed_dim_of(const ValueRef idx) const {
322326
return values_.at(idx).toConstTensor().packed_dim();
323327
}
@@ -338,10 +342,6 @@ class ComputeGraph final {
338342
return values_.at(idx).toTensor().numel_ubo();
339343
}
340344

341-
inline vkapi::BufferBindInfo axis_map_ubo(const ValueRef idx) {
342-
return values_.at(idx).toTensor().axis_map_ubo();
343-
}
344-
345345
inline bool has_standard_axis_map(const ValueRef idx) {
346346
return values_.at(idx).toTensor().has_standard_axis_map();
347347
}

backends/vulkan/runtime/graph/ops/glsl/addmm_naive_texture3d.glsl

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,31 +16,39 @@ $if MAT2_IS_TRANSPOSED:
1616
$if HAS_BIAS:
1717
#define HAS_BIAS
1818

19-
#include "indexing_utils.h"
20-
2119
${layout_declare_tensor(B, "w", "out_tensor", DTYPE, "texture3d")}
2220
${layout_declare_tensor(B, "r", "mat1_tensor", DTYPE, "texture3d")}
2321
${layout_declare_tensor(B, "r", "mat2_tensor", DTYPE, "texture3d")}
2422
$if HAS_BIAS:
2523
${layout_declare_tensor(B, "r", "bias_tensor", DTYPE, "texture3d")}
2624
${layout_declare_ubo(B, "ivec4", "out_sizes")}
2725
${layout_declare_ubo(B, "ivec3", "out_limits")}
28-
${layout_declare_ubo(B, "ivec4", "out_axis_map")}
2926
${layout_declare_ubo(B, "ivec4", "mat1_sizes")}
30-
${layout_declare_ubo(B, "ivec4", "mat1_axis_map")}
3127
${layout_declare_ubo(B, "ivec4", "mat2_sizes")}
32-
${layout_declare_ubo(B, "ivec4", "mat2_axis_map")}
3328
$if HAS_BIAS:
3429
${layout_declare_ubo(B, "ivec4", "bias_sizes")}
35-
${layout_declare_ubo(B, "ivec4", "bias_axis_map")}
3630
${layout_declare_ubo(B, "float", "alpha", "float", "beta")}
3731

32+
#include "indexing_utils.h"
33+
3834
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
3935

40-
layout(constant_id = 3) const int out_packed_dim = C_DIM;
41-
layout(constant_id = 4) const int mat1_packed_dim = W_DIM;
42-
layout(constant_id = 5) const int mat2_packed_dim = H_DIM;
43-
layout(constant_id = 6) const int bias_packed_dim = W_DIM;
36+
${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")}
37+
const lowp ivec4 out_axis_map = unhash_axis_map(out_layout);
38+
const lowp int out_packed_dim = unhash_packed_dim(out_layout);
39+
40+
${layout_declare_spec_const(C, "int", "mat1_layout", "DEFAULT_LAYOUT")}
41+
const lowp ivec4 mat1_axis_map = unhash_axis_map(mat1_layout);
42+
const lowp int mat1_packed_dim = unhash_packed_dim(mat1_layout);
43+
44+
${layout_declare_spec_const(C, "int", "mat2_layout", "DEFAULT_LAYOUT")}
45+
const lowp ivec4 mat2_axis_map = unhash_axis_map(mat2_layout);
46+
const lowp int mat2_packed_dim = unhash_packed_dim(mat2_layout);
47+
48+
$if HAS_BIAS:
49+
${layout_declare_spec_const(C, "int", "bias_layout", "DEFAULT_LAYOUT")}
50+
const lowp ivec4 bias_axis_map = unhash_axis_map(bias_layout);
51+
const lowp int bias_packed_dim = unhash_packed_dim(bias_layout);
4452

4553
#ifdef HAS_BIAS
4654
vec4 get_bias_texel_W_packed(ivec3 logical_pos) {

backends/vulkan/runtime/graph/ops/glsl/addmm_optimized.glsl

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,27 +19,35 @@ $if BATCH_MODE:
1919
$if HAS_BIAS:
2020
#define HAS_BIAS
2121

22-
#include "indexing_utils.h"
23-
2422
${layout_declare_tensor(B, "w", "out_tensor", DTYPE, "texture3d")}
2523
${layout_declare_tensor(B, "r", "mat1_tensor", DTYPE, "texture3d")}
2624
${layout_declare_tensor(B, "r", "mat2_tensor", DTYPE, "texture3d")}
2725
$if HAS_BIAS:
2826
${layout_declare_tensor(B, "r", "bias_tensor", DTYPE, "texture3d")}
2927
${layout_declare_ubo(B, "ivec4", "out_sizes")}
30-
${layout_declare_ubo(B, "ivec4", "out_axis_map")}
3128
${layout_declare_ubo(B, "ivec4", "mat1_sizes")}
32-
${layout_declare_ubo(B, "ivec4", "mat1_axis_map")}
3329
${layout_declare_ubo(B, "ivec4", "mat2_sizes")}
34-
${layout_declare_ubo(B, "ivec4", "mat2_axis_map")}
3530
$if HAS_BIAS:
3631
${layout_declare_ubo(B, "ivec4", "bias_sizes")}
37-
${layout_declare_ubo(B, "ivec4", "bias_axis_map")}
3832
${layout_declare_ubo(B, "float", "alpha", "float", "beta")}
3933

34+
#include "indexing_utils.h"
35+
4036
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
4137

42-
layout(constant_id = 3) const int out_packed_dim = C_DIM;
38+
${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")}
39+
const lowp ivec4 out_axis_map = unhash_axis_map(out_layout);
40+
const lowp int out_packed_dim = unhash_packed_dim(out_layout);
41+
42+
${layout_declare_spec_const(C, "int", "mat1_layout", "DEFAULT_LAYOUT")}
43+
const lowp ivec4 mat1_axis_map = unhash_axis_map(mat1_layout);
44+
45+
${layout_declare_spec_const(C, "int", "mat2_layout", "DEFAULT_LAYOUT")}
46+
const lowp ivec4 mat2_axis_map = unhash_axis_map(mat2_layout);
47+
48+
$if HAS_BIAS:
49+
${layout_declare_spec_const(C, "int", "bias_layout", "DEFAULT_LAYOUT")}
50+
const lowp ivec4 bias_axis_map = unhash_axis_map(bias_layout);
4351

4452
// To convince the SPIR-V compiler to unroll the loops optimally, need this
4553
// macro

backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,26 +14,31 @@
1414

1515
#define op(X, Y, A) ${OPERATOR}
1616

17-
#include "broadcasting_utils.h"
18-
#include "indexing_utils.h"
19-
2017
layout(std430) buffer;
2118

2219
${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
2320
${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}
2421
${layout_declare_tensor(B, "r", "t_other", DTYPE, STORAGE)}
2522
${layout_declare_ubo(B, "ivec4", "out_sizes")}
26-
${layout_declare_ubo(B, "ivec4", "out_axis_map")}
2723
${layout_declare_ubo(B, "ivec4", "in_sizes")}
28-
${layout_declare_ubo(B, "ivec4", "in_axis_map")}
2924
${layout_declare_ubo(B, "ivec4", "other_sizes")}
30-
${layout_declare_ubo(B, "ivec4", "other_axis_map")}
3125
${layout_declare_ubo(B, "ivec2", "broadcast_params")}
3226
${layout_declare_ubo(B, "float", "alpha")}
3327

28+
#include "broadcasting_utils.h"
29+
#include "indexing_utils.h"
30+
3431
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
3532

36-
layout(constant_id = 3) const int packed_dim = C_DIM;
33+
${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")}
34+
const lowp ivec4 out_axis_map = unhash_axis_map(out_layout);
35+
const lowp int packed_dim = unhash_packed_dim(out_layout);
36+
37+
${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")}
38+
const lowp ivec4 in_axis_map = unhash_axis_map(in_layout);
39+
40+
${layout_declare_spec_const(C, "int", "other_layout", "DEFAULT_LAYOUT")}
41+
const lowp ivec4 other_axis_map = unhash_axis_map(other_layout);
3742

3843
void main() {
3944
const ivec3 lpos = ivec3(gl_GlobalInvocationID);

backends/vulkan/runtime/graph/ops/glsl/bitw8_image_to_nchw_nobitw8buffer.glsl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,13 @@ layout(std430) buffer;
2121
${layout_declare_buffer(B, "w", "nchw_out", "int")}
2222
${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}
2323
${layout_declare_ubo(B, "ivec4", "tensor_sizes")}
24-
${layout_declare_ubo(B, "ivec4", "axis_map")}
2524
${layout_declare_ubo(B, "int", "out_numel")}
2625

2726
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
2827

29-
layout(constant_id = 3) const int packed_dim = C_DIM;
28+
${layout_declare_spec_const(C, "int", "t_layout", "DEFAULT_LAYOUT")}
29+
const lowp ivec4 axis_map = unhash_axis_map(t_layout);
30+
const lowp int packed_dim = unhash_packed_dim(t_layout);
3031

3132
void main() {
3233
const int out_buf_idx = int(gl_GlobalInvocationID.x);

backends/vulkan/runtime/graph/ops/glsl/conv1d.glsl

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@
1414

1515
#define op(X, A, B) ${OPERATOR}
1616

17-
#include "indexing_utils.h"
18-
1917
layout(std430) buffer;
2018

2119
${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
@@ -26,17 +24,26 @@ ${layout_declare_tensor(B, "r", "bias_in", DTYPE, STORAGE)}
2624
${layout_declare_ubo(B, "ivec3", "out_limits")}
2725
${layout_declare_ubo(B, "ivec4", "in_sizes")}
2826

29-
${layout_declare_ubo(B, "ivec4", "out_axis_map")}
30-
${layout_declare_ubo(B, "ivec4", "in_axis_map")}
31-
${layout_declare_ubo(B, "ivec4", "kernel_axis_map")}
32-
${layout_declare_ubo(B, "ivec4", "bias_axis_map")}
33-
3427
${layout_declare_ubo(B,"int", "kernel_size", "int", "stride", "int", "padding", "int", "dilation", "int", "in_group_size", "int", "out_group_size")}
3528

3629
${layout_declare_ubo(B, "float", "out_min", "float", "out_max")}
3730

31+
#include "indexing_utils.h"
32+
3833
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
3934

35+
${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")}
36+
const lowp ivec4 out_axis_map = unhash_axis_map(out_layout);
37+
38+
${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")}
39+
const lowp ivec4 in_axis_map = unhash_axis_map(in_layout);
40+
41+
${layout_declare_spec_const(C, "int", "kernel_layout", "DEFAULT_LAYOUT")}
42+
const lowp ivec4 kernel_axis_map = unhash_axis_map(kernel_layout);
43+
44+
${layout_declare_spec_const(C, "int", "bias_layout", "DEFAULT_LAYOUT")}
45+
const lowp ivec4 bias_axis_map = unhash_axis_map(bias_layout);
46+
4047
// Let us define
4148
//
4249
// input = (N, in_C, in_L),

backends/vulkan/runtime/graph/ops/glsl/copy_channel_offset.glsl

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,14 @@
1414

1515
layout(std430) buffer;
1616

17-
#include "indexing_utils.h"
17+
${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
18+
${layout_declare_tensor(B, "r", "existing_out", DTYPE, STORAGE)}
19+
${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}
1820

19-
${layout_declare_tensor(0, "w", "t_out", DTYPE, STORAGE)}
20-
${layout_declare_tensor(1, "r", "existing_out", DTYPE, STORAGE)}
21-
${layout_declare_tensor(2, "r", "t_in", DTYPE, STORAGE)}
21+
${layout_declare_ubo(B, "ivec4", "out_sizes")}
22+
${layout_declare_ubo(B, "ivec4", "in_sizes")}
2223

23-
${layout_declare_ubo(3, "ivec4", "out_sizes")}
24-
${layout_declare_ubo(4, "ivec4", "out_axis_map")}
25-
${layout_declare_ubo(5, "ivec4", "in_sizes")}
26-
${layout_declare_ubo(6, "ivec4", "in_axis_map")}
27-
layout(set = 0, binding = 7) uniform PRECISION restrict CopyArgs {
24+
layout(set = 0, binding = 5) uniform PRECISION restrict CopyArgs {
2825
// Operates on (x, y, z) logical extents.
2926
ivec3 range;
3027
// Analogus to range variable in copy. It defines the # of channel being
@@ -35,9 +32,16 @@ layout(set = 0, binding = 7) uniform PRECISION restrict CopyArgs {
3532
int src_channel_offset;
3633
};
3734

35+
#include "indexing_utils.h"
36+
3837
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
3938

40-
layout(constant_id = 3) const int packed_dim = C_DIM;
39+
${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")}
40+
const lowp ivec4 out_axis_map = unhash_axis_map(out_layout);
41+
const lowp int packed_dim = unhash_packed_dim(out_layout);
42+
43+
${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")}
44+
const lowp ivec4 in_axis_map = unhash_axis_map(in_layout);
4145

4246
void main() {
4347
// Note: Unlike other shaders, the range is often not equal to the destination

backends/vulkan/runtime/graph/ops/glsl/copy_offset.glsl

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,19 +12,23 @@
1212

1313
${define_active_storage_type(STORAGE)}
1414

15-
#include "indexing_utils.h"
16-
1715
layout(std430) buffer;
1816

1917
${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
2018
${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}
2119

2220
${layout_declare_ubo(B, "ivec3", "range", "ivec3", "src_offset", "ivec3", "dst_offset")}
23-
${layout_declare_ubo(B, "ivec4", "out_axis_map")}
24-
${layout_declare_ubo(B, "ivec4", "in_axis_map")}
21+
22+
#include "indexing_utils.h"
2523

2624
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
2725

26+
${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")}
27+
const lowp ivec4 out_axis_map = unhash_axis_map(out_layout);
28+
29+
${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")}
30+
const lowp ivec4 in_axis_map = unhash_axis_map(in_layout);
31+
2832
void main() {
2933
const ivec3 pos = ivec3(gl_GlobalInvocationID);
3034

0 commit comments

Comments
 (0)