7
7
*/
8
8
9
9
#include < executorch/backends/vulkan/runtime/api/containers/Tensor.h>
10
+ #include < cstring>
10
11
11
12
namespace vkcompute {
12
13
namespace api {
@@ -446,11 +447,10 @@ vTensor::vTensor(
446
447
dim_order_(calculate_dim_order(sizes_.size(), packed_dim_)),
447
448
axis_map_(default_axis_map()),
448
449
strides_(calculate_strides(sizes, dim_order_)),
449
- numel_(utils::multiply_integers(sizes_)),
450
450
padded_sizes_{calculate_padded_sizes (sizes, packed_dim_)},
451
- unsqueezed_strides_{unsqueeze_strides (strides_, numel_)},
451
+ unsqueezed_strides_{
452
+ unsqueeze_strides (strides_, utils::multiply_integers (sizes_))},
452
453
padded_numel_ (utils::multiply_integers(padded_sizes_)),
453
- logical_limits_{{0 , 0 , 0 }},
454
454
uniforms_(),
455
455
// Utility Uniform Buffers that can be passed to shaders as arguments
456
456
uniforms_size_(0 ),
@@ -467,6 +467,11 @@ vTensor::vTensor(
467
467
padded_sizes_,
468
468
dtype_,
469
469
allocate_memory) {
470
+ uniform_data_ = std::make_shared<UniformData>(UniformData{
471
+ sizes_,
472
+ unsqueezed_strides_,
473
+ {{0 , 0 , 0 }},
474
+ static_cast <size_t >(utils::multiply_integers (sizes_))});
470
475
VK_CHECK_COND (
471
476
dim_order_is_valid (dim_order_), " computed dim order is invalid" );
472
477
@@ -494,11 +499,9 @@ vTensor::vTensor(
494
499
dim_order_(),
495
500
axis_map_(default_axis_map()),
496
501
strides_(),
497
- numel_(utils::multiply_integers(sizes_)),
498
502
padded_sizes_(calculate_padded_sizes(sizes_, packed_dim_)),
499
503
unsqueezed_strides_(),
500
504
padded_numel_(utils::multiply_integers(padded_sizes_)),
501
- logical_limits_(),
502
505
uniforms_(),
503
506
// Utility Uniform Buffers that can be passed to shaders as arguments
504
507
uniforms_size_(0 ),
@@ -508,6 +511,11 @@ vTensor::vTensor(
508
511
logical_limits_uniform_offset_(kUniformOffsetUnset ),
509
512
// Construct Tensor storage
510
513
storage_(context, image) {
514
+ uniform_data_ = std::make_shared<UniformData>(UniformData{
515
+ sizes_,
516
+ {0 , 0 , 0 , 0 },
517
+ {{0 , 0 , 0 }},
518
+ static_cast <size_t >(utils::multiply_integers (sizes_))});
511
519
set_logical_limits (storage_.image_extents_ );
512
520
}
513
521
@@ -519,13 +527,11 @@ vTensor::vTensor(vTensor& other)
519
527
dim_order_ (other.dim_order_.begin(), other.dim_order_.end()),
520
528
axis_map_(other.axis_map_.begin(), other.axis_map_.end()),
521
529
strides_(other.strides_.begin(), other.strides_.end()),
522
- numel_(other.numel_),
523
530
padded_sizes_{other.padded_sizes_ .begin (), other.padded_sizes_ .end ()},
524
531
unsqueezed_strides_{
525
532
other.unsqueezed_strides_ .begin (),
526
533
other.unsqueezed_strides_ .end ()},
527
534
padded_numel_ (other.padded_numel_),
528
- logical_limits_{other.logical_limits_ },
529
535
uniforms_(),
530
536
// Empty initialize Utility Uniform Buffers
531
537
uniforms_size_(0 ),
@@ -534,7 +540,9 @@ vTensor::vTensor(vTensor& other)
534
540
numel_uniform_offset_(kUniformOffsetUnset ),
535
541
logical_limits_uniform_offset_(kUniformOffsetUnset ),
536
542
// Copy Tensor storage
537
- storage_(other.storage_) {}
543
+ storage_(other.storage_) {
544
+ uniform_data_ = std::make_shared<UniformData>(*other.get_uniform_data ());
545
+ }
538
546
539
547
vTensor::vTensor (
540
548
vTensor& other,
@@ -548,11 +556,10 @@ vTensor::vTensor(
548
556
dim_order_(dim_order.begin(), dim_order.end()),
549
557
axis_map_(default_axis_map()),
550
558
strides_(calculate_strides(sizes_, dim_order_)),
551
- numel_(utils::multiply_integers(sizes_)),
552
559
padded_sizes_{calculate_padded_sizes (sizes, packed_dim_)},
553
- unsqueezed_strides_{unsqueeze_strides (strides_, numel_)},
560
+ unsqueezed_strides_{
561
+ unsqueeze_strides (strides_, utils::multiply_integers (sizes_))},
554
562
padded_numel_ (utils::multiply_integers(padded_sizes_)),
555
- logical_limits_(other.logical_limits_),
556
563
uniforms_(),
557
564
// Empty initialize Utility Uniform Buffers
558
565
uniforms_size_(0 ),
@@ -562,14 +569,45 @@ vTensor::vTensor(
562
569
logical_limits_uniform_offset_(kUniformOffsetUnset ),
563
570
// Copy Tensor storage
564
571
storage_(other.storage_, vkapi::element_size(dtype_) * offset_numel) {
572
+ uniform_data_ = std::make_shared<UniformData>(UniformData{
573
+ sizes_,
574
+ unsqueezed_strides_,
575
+ {other.logical_limits ()},
576
+ static_cast <size_t >(utils::multiply_integers (sizes_))});
577
+
565
578
VK_CHECK_COND (
566
579
dim_order_is_valid (dim_order_), " new dim order provided is invalid" );
567
580
VK_CHECK_COND (
568
- offset_numel + numel_ <= other.numel (),
581
+ offset_numel + numel () <= other.numel (),
569
582
" Tensor alias cannot access more elements than available in the original"
570
583
" tensor" );
571
584
}
572
585
586
+ uint32_t vTensor::UniformData::write_attribute (
587
+ void * dst,
588
+ const uint32_t dst_offset,
589
+ const uint32_t max_dst_size,
590
+ const Attribute attr) {
591
+ #define WRITE_ATTRIBUTE_CASE (enum_name, member_name ) \
592
+ case vTensor::Attribute::enum_name: { \
593
+ VK_CHECK_COND ( \
594
+ (dst_offset + sizeof (member_name)) <= max_dst_size, \
595
+ " Attempting to write tensor attribute outside data boundary." ); \
596
+ memcpy ((uint8_t *)dst + dst_offset, &member_name, sizeof (member_name)); \
597
+ return sizeof (member_name); \
598
+ }
599
+ switch (attr) {
600
+ WRITE_ATTRIBUTE_CASE (SIZES, sizes_v);
601
+ WRITE_ATTRIBUTE_CASE (STRIDES, strides_v);
602
+ WRITE_ATTRIBUTE_CASE (LOGICAL_LIMITS, logical_limits);
603
+ WRITE_ATTRIBUTE_CASE (NUMEL, numel);
604
+ default :
605
+ VK_THROW (" Invalid Attribute" );
606
+ }
607
+ #undef WRITE_ATTRIBUTE_CASE
608
+ return 0 ;
609
+ }
610
+
573
611
vkapi::VulkanImage& vTensor::image (
574
612
vkapi::PipelineBarrier& pipeline_barrier,
575
613
const vkapi::PipelineStageFlags stage) & {
@@ -601,9 +639,9 @@ vkapi::VulkanBuffer& vTensor::buffer(
601
639
}
602
640
603
641
void vTensor::set_logical_limits (const utils::uvec3& image_extents) {
604
- logical_limits_ .limits [0 ] = image_extents[axis_map_.at (0 )];
605
- logical_limits_ .limits [1 ] = image_extents[axis_map_.at (1 )];
606
- logical_limits_ .limits [2 ] = image_extents[axis_map_.at (2 )];
642
+ uniform_data_-> logical_limits .limits [0 ] = image_extents[axis_map_.at (0 )];
643
+ uniform_data_-> logical_limits .limits [1 ] = image_extents[axis_map_.at (1 )];
644
+ uniform_data_-> logical_limits .limits [2 ] = image_extents[axis_map_.at (2 )];
607
645
}
608
646
609
647
utils::GPUMemoryLayout vTensor::estimate_memory_layout () const {
@@ -661,7 +699,7 @@ const vkapi::BufferBindInfo vTensor::logical_limits_ubo() {
661
699
" Uniform data allocation has exceeded Tensor uniform buffer size" );
662
700
logical_limits_uniform_offset_ = uniforms_size_;
663
701
uniforms_size_ += kSizePerUniform ;
664
- uniforms_.update (logical_limits_ , logical_limits_uniform_offset_);
702
+ uniforms_.update (logical_limits () , logical_limits_uniform_offset_);
665
703
}
666
704
return vkapi::BufferBindInfo (
667
705
uniforms_.buffer (), logical_limits_uniform_offset_);
@@ -677,7 +715,7 @@ const vkapi::BufferBindInfo vTensor::numel_ubo() {
677
715
" Uniform data allocation has exceeded Tensor uniform buffer size" );
678
716
numel_uniform_offset_ = uniforms_size_;
679
717
uniforms_size_ += kSizePerUniform ;
680
- uniforms_.update (numel_ , numel_uniform_offset_);
718
+ uniforms_.update (numel () , numel_uniform_offset_);
681
719
}
682
720
return vkapi::BufferBindInfo (uniforms_.buffer (), numel_uniform_offset_);
683
721
}
@@ -687,10 +725,10 @@ size_t vTensor::staging_buffer_numel() const {
687
725
const bool int8_supported =
688
726
storage_.context_ ->adapter_ptr ()->has_full_int8_buffers_support ();
689
727
if (is_int8 && !int8_supported) {
690
- return utils::align_up_4 (numel_ );
728
+ return utils::align_up_4 (numel () );
691
729
}
692
730
if (storage_type () == utils::kBuffer ) {
693
- return numel_ ;
731
+ return numel () ;
694
732
}
695
733
return padded_numel_;
696
734
}
@@ -720,30 +758,32 @@ void vTensor::bind_allocation(const vkapi::Allocation& allocation) {
720
758
721
759
void vTensor::update_metadata () {
722
760
strides_ = calculate_strides (sizes_, dim_order_);
723
- numel_ = utils::multiply_integers (sizes_);
761
+ uniform_data_-> numel = utils::multiply_integers (sizes_);
724
762
725
763
padded_sizes_ = calculate_padded_sizes (sizes_, packed_dim_);
726
- unsqueezed_strides_ = unsqueeze_strides (strides_, numel_ );
764
+ unsqueezed_strides_ = unsqueeze_strides (strides_, numel () );
727
765
padded_numel_ = utils::multiply_integers (padded_sizes_);
728
766
767
+ // Update uniform data if it has been modified
768
+ uniform_data_->sizes_v = utils::make_whcn_ivec4 (sizes_);
769
+ uniform_data_->strides_v = utils::make_whcn_ivec4 (unsqueezed_strides_);
770
+
729
771
// Calculate the image extents that would have been used to allocate a texture
730
772
// withthe current sizes, and use that to set the logical limits.
731
773
set_logical_limits (
732
774
calculate_image_extents (padded_sizes_, axis_map_, packed_dim_));
733
775
734
776
if (sizes_uniform_offset_ != kUniformOffsetUnset ) {
735
- uniforms_.update (utils::make_whcn_ivec4 (sizes_) , sizes_uniform_offset_);
777
+ uniforms_.update (uniform_data_-> sizes_v , sizes_uniform_offset_);
736
778
}
737
779
if (unsqueezed_strides_offset_ != kUniformOffsetUnset ) {
738
- uniforms_.update (
739
- utils::make_whcn_ivec4 (unsqueezed_strides_),
740
- unsqueezed_strides_offset_);
780
+ uniforms_.update (uniform_data_->strides_v , unsqueezed_strides_offset_);
741
781
}
742
782
if (numel_uniform_offset_ != kUniformOffsetUnset ) {
743
- uniforms_.update (numel_ , numel_uniform_offset_);
783
+ uniforms_.update (numel () , numel_uniform_offset_);
744
784
}
745
785
if (logical_limits_uniform_offset_ != kUniformOffsetUnset ) {
746
- uniforms_.update (logical_limits_ , logical_limits_uniform_offset_);
786
+ uniforms_.update (logical_limits () , logical_limits_uniform_offset_);
747
787
}
748
788
}
749
789
@@ -796,6 +836,8 @@ void vTensor::virtual_clone(const vTensor& other) {
796
836
dim_order_ = other.dim_order_ ;
797
837
axis_map_ = other.axis_map_ ;
798
838
packed_dim_ = other.packed_dim_ ;
839
+
840
+ *uniform_data_ = *other.get_uniform_data ();
799
841
}
800
842
801
843
void vTensor::virtual_resize (const std::vector<int64_t >& new_sizes) {
0 commit comments