Skip to content

Commit bd11b11

Browse files
committed
[ET-VK] Introduce virtual_clone API to support view of view use cases + fix synchronization hazard with view tensors
## Context This diff fixes some hazards (not necessarily) bugs with view tensors. ### `virtual_clone` API Consider the following sequence of calls which may be common in the view of view use case. ``` t1 = graph.add_tensor(...); // t2 will have the same metadata as t1 t2 = graph.add_tensor_view(t1); // t3 will also have the same metadata as t2 at this point. t3 = graph.add_tensor_view(t2); // t2 metadata will be updated correctly. t2 = add_transpose_view_node(t1, 0, 1, t2); // Unfortunately, this node will have an assumption that t3 has the same metadata as t2 to start. However, this is not true. // As a result, t3 will have incorrect metadata after this node. t3 = add_transpose_view_node(t2, 1, 2, t3); ``` To address this, the `virtual_clone` API is introduced which will allow view nodes to set the metadata of the output equal to the input before modifying the output. ### WAW synchronization hazards `vTensorStorage` maintains a `last_access` state which facilitates inserting the correct memory barriers for the underlying `vkImage` or `vkBuffer`. However, when we create a tensor view, `last_access` is not shared between `vTensor` instances that use the same resource. As as result, writing into a `vTensor` will not update the `last_access` of its views, and vice versa. Therefore, sebsequent accesses of the other tensor that references the same resource will result in a synchronization hazard. This diff fixes this hazard in a bit of a crude way; if the `vTensor` is a copy, or has copies, then cowardly assume that it has been written to before the current access so that appropriate memory barriers are inserted. This was the selected solution because I thought that adding a map to track last access of tensors that share resources is a bit overkill when the assumption that the underlying resource has been written to before the current access should hold most of the time. Differential Revision: [D63642092](https://our.internmc.facebook.com/intern/diff/D63642092/) [ghstack-poisoned]
1 parent fe0e676 commit bd11b11

File tree

3 files changed

+56
-8
lines changed

3 files changed

+56
-8
lines changed

backends/vulkan/runtime/api/containers/Tensor.cpp

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -277,10 +277,11 @@ vTensorStorage::vTensorStorage(
277277
storage_type_,
278278
dtype,
279279
allocate_memory)),
280-
last_access_{} {}
280+
last_access_{},
281+
has_copies_{false} {}
281282

282283
vTensorStorage::vTensorStorage(
283-
const vTensorStorage& other,
284+
vTensorStorage& other,
284285
const int64_t buffer_offset)
285286
: context_(other.context_),
286287
storage_type_{other.storage_type_},
@@ -289,7 +290,10 @@ vTensorStorage::vTensorStorage(
289290
buffer_offset_{buffer_offset},
290291
image_(other.image_),
291292
buffer_(other.buffer_, buffer_offset),
292-
last_access_{other.last_access_} {}
293+
last_access_{other.last_access_},
294+
has_copies_{false} {
295+
other.has_copies_ = true;
296+
}
293297

294298
vTensorStorage::~vTensorStorage() {
295299
flush();
@@ -312,6 +316,21 @@ void vTensorStorage::transition(
312316
vkapi::PipelineStageFlags prev_stage = last_access_.stage;
313317
vkapi::MemoryAccessFlags prev_access = last_access_.access;
314318

319+
// If the underlying resource is a copy of another tensor's resource the
320+
// last_access may not be accurate, since the original storage may have been
321+
// written to as part of the original tensor. Likewise, if the underlying
322+
// resource has copies, then the resource may have been updated as part of the
323+
// view tensors.
324+
//
325+
// If the resource is a copy, or has copies of it, then cowardly assume that
326+
// it has previously been written to as part of a compute shader before the
327+
// current access event so that the appropriate memory barriers may be
328+
// inserted.
329+
if (is_copy() || has_copies_) {
330+
prev_stage = vkapi::PipelineStage::COMPUTE;
331+
prev_access = vkapi::kWrite;
332+
}
333+
315334
const bool prev_written = (prev_access & vkapi::MemoryAccessType::WRITE) != 0;
316335

317336
VkImageLayout cur_layout = VK_IMAGE_LAYOUT_UNDEFINED;
@@ -358,6 +377,13 @@ void vTensorStorage::transition(
358377
last_access_.access = cur_access;
359378
}
360379

380+
bool vTensorStorage::is_copy() const {
381+
if (storage_type_ == utils::kBuffer) {
382+
return buffer_.is_copy();
383+
}
384+
return image_.is_copy();
385+
}
386+
361387
bool vTensorStorage::is_copy_of(const vTensorStorage& other) const {
362388
if (storage_type_ == utils::kBuffer) {
363389
return buffer_.is_copy_of(other.buffer_);
@@ -418,7 +444,7 @@ vTensor::vTensor(
418444
}
419445
}
420446

421-
vTensor::vTensor(const vTensor& other)
447+
vTensor::vTensor(vTensor& other)
422448
: dtype_(other.dtype_),
423449
// Copy tensor size metadata
424450
sizes_(other.sizes_.begin(), other.sizes_.end()),
@@ -443,7 +469,7 @@ vTensor::vTensor(const vTensor& other)
443469
storage_(other.storage_) {}
444470

445471
vTensor::vTensor(
446-
const vTensor& other,
472+
vTensor& other,
447473
const std::vector<int64_t>& sizes,
448474
const std::vector<int64_t>& dim_order,
449475
const int64_t offset_numel)
@@ -671,6 +697,14 @@ void vTensor::virtual_reconfigure(
671697
update_metadata();
672698
}
673699

700+
void vTensor::virtual_clone(const vTensor& other) {
701+
VK_CHECK_COND(is_view_of(other));
702+
sizes_ = other.sizes_;
703+
dim_order_ = other.dim_order_;
704+
axis_map_ = other.axis_map_;
705+
packed_dim_ = other.packed_dim_;
706+
}
707+
674708
void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
675709
VK_CHECK_COND(
676710
new_sizes.size() == dim_order_.size(),

backends/vulkan/runtime/api/containers/Tensor.h

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ class vTensorStorage final {
104104
* because this behaviour is unsafe, since the original tensor may be
105105
* destroyed before the copy is destroyed.
106106
*/
107-
vTensorStorage(const vTensorStorage& other, const int64_t buffer_offset = 0);
107+
vTensorStorage(vTensorStorage& other, const int64_t buffer_offset = 0);
108108

109109
public:
110110
// To discourage creating copies, the assignment operator is still deleted.
@@ -134,6 +134,8 @@ class vTensorStorage final {
134134

135135
// Last Access - used to insert memory barriers
136136
LastAccess last_access_;
137+
// Indicates whether copies of this vTensorStorage have been made
138+
bool has_copies_;
137139

138140
private:
139141
// Registers underlying memory for cleanup
@@ -153,6 +155,11 @@ class vTensorStorage final {
153155
return image_.format();
154156
}
155157

158+
/*
159+
* Check if the underlying resource is a copy of another resource
160+
*/
161+
bool is_copy() const;
162+
156163
/*
157164
* Used for checking if this vTensorStorage is a copy of another instance
158165
*/
@@ -185,7 +192,7 @@ class vTensor final {
185192
* Once created, the sizes and strides of the aliased vTensor can be changed
186193
* using the `virtual_reconfigure` member function.
187194
*/
188-
vTensor(const vTensor& other);
195+
vTensor(vTensor& other);
189196

190197
/*
191198
* This constructor allows for the creation of a vTensor that references the
@@ -202,7 +209,7 @@ class vTensor final {
202209
* buffer.
203210
*/
204211
vTensor(
205-
const vTensor& other,
212+
vTensor& other,
206213
const std::vector<int64_t>& sizes,
207214
const std::vector<int64_t>& dim_order,
208215
const int64_t offset_numel = 0);
@@ -511,6 +518,11 @@ class vTensor final {
511518
const std::vector<int64_t>& new_sizes,
512519
const std::vector<int64_t>& new_dim_order);
513520

521+
/*
522+
* Set all metadata of this tensor to match the metadata of another tensor.
523+
*/
524+
void virtual_clone(const vTensor& other);
525+
514526
/*
515527
* Perform a virtual resize of the vTensor by modifying the size metadata that
516528
* gets used in compute shaders. This allows the shader to treat the

backends/vulkan/runtime/graph/ops/impl/Transpose.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ void add_transpose_view_node(
6262
const int64_t dim1 = graph.extract_scalar<int64_t>(dim1_ref);
6363

6464
check_transpose_view_args(graph, input_ref, dim0, dim1, out_ref);
65+
const vTensorPtr in = graph.get_tensor(input_ref);
66+
graph.get_tensor(out_ref)->virtual_clone(*in);
6567
graph.get_tensor(out_ref)->virtual_transpose(dim0, dim1);
6668

6769
graph.execute_nodes().emplace_back(new ExecuteNode(

0 commit comments

Comments
 (0)