[ET-VK][15/n] reconcile Dim4D and NchwDim

yipjustin · yipjustin · commit 02630bcaa5bc · 2024-04-29T16:32:51.000-07:00
TSIA. Differential Revision: [D56731155](https://our.internmc.facebook.com/intern/diff/D56731155/) [ghstack-poisoned]
diff --git a/backends/vulkan/runtime/graph/ops/impl/Cat.cpp b/backends/vulkan/runtime/graph/ops/impl/Cat.cpp
@@ -31,10 +31,10 @@ void add_cat_default_node(
   int64_t dim = graph.extract_scalar<int64_t>(dim_ref);
   vTensorPtr t_out = graph.get_tensor(out);
 
-  NchwDim nchw_dim = normalize_to_nchw_dim(*t_out, dim);
+  Dim4DType dim4d = normalize_to_dim4d(*t_out, dim);
 
   // TODO: Find ways to factor out the similar code for width, height, and batch
-  if (nchw_dim == DimWidth) {
+  if (dim4d == DIM4D_WIDTH) {
     api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false);
     api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false);
 
@@ -46,7 +46,7 @@ void add_cat_default_node(
       dst_offset.data[0] += range.data[0];
     }
 
-  } else if (nchw_dim == DimHeight) {
+  } else if (dim4d == DIM4D_HEIGHT) {
     api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false);
     api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false);
 
@@ -57,7 +57,7 @@ void add_cat_default_node(
           graph, input_ref, range, src_offset, dst_offset, out);
       dst_offset.data[1] += range.data[1];
     }
-  } else if (nchw_dim == DimBatch) {
+  } else if (dim4d == DIM4D_BATCH) {
     api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false);
     api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false);
 
@@ -68,19 +68,19 @@ void add_cat_default_node(
           graph, input_ref, range, src_offset, dst_offset, out);
       dst_offset.data[2] += range.data[2];
     }
-  } else if (nchw_dim == DimChannel) {
+  } else if (dim4d == DIM4D_CHANNEL) {
     int32_t src_offset = 0;
     int32_t dst_offset = 0;
 
     for (ValueRef input_ref : *input_list) {
       vTensorPtr t_in = graph.get_tensor(input_ref);
-      int32_t range = dim_at<Dim4D::Channel>(t_in->sizes());
+      int32_t range = dim_at(t_in->sizes(), DIM4D_CHANNEL);
       add_copy_channel_offset_node(
           graph, input_ref, range, src_offset, dst_offset, out);
       dst_offset += range;
     }
   } else {
-    VK_THROW("Unexpected value of nchw_dim=", nchw_dim);
+    VK_THROW("Unexpected value of dim4d=", dim4d);
   }
 }
 
diff --git a/backends/vulkan/runtime/graph/ops/impl/Copy.cpp b/backends/vulkan/runtime/graph/ops/impl/Copy.cpp
@@ -92,23 +92,23 @@ void add_copy_channel_offset_node(
   VK_CHECK_COND(t_out->dim() >= 3, "Dst dim should be at least 3");
 
   VK_CHECK_COND(
-      dim_at<Dim4D::Channel>(in_sizes) >= src_channel_offset + channel_range,
+      dim_at<DIM4D_CHANNEL>(in_sizes) >= src_channel_offset + channel_range,
       "Src channel (",
       src_channel_offset,
       ") and range (",
       channel_range,
       ") should be less than or equal to input tensor's channel size (",
-      dim_at<Dim4D::Channel>(in_sizes),
+      dim_at<DIM4D_CHANNEL>(in_sizes),
       ")");
 
   VK_CHECK_COND(
-      dim_at<Dim4D::Channel>(out_sizes) >= dst_channel_offset + channel_range,
+      dim_at<DIM4D_CHANNEL>(out_sizes) >= dst_channel_offset + channel_range,
       "Dst channel (",
       dst_channel_offset,
       ") and range (",
       channel_range,
       ") should be less than or equal to input tensor's channel size (",
-      dim_at<Dim4D::Channel>(out_sizes),
+      dim_at<DIM4D_CHANNEL>(out_sizes),
       ")");
 
   VK_CHECK_COND(channel_range >= 0, "Channel range must be non-negative");
@@ -121,10 +121,10 @@ void add_copy_channel_offset_node(
   kernel_name.reserve(kShaderNameReserve);
   add_dtype_suffix(kernel_name, *t_out);
 
-  int32_t out_channels = dim_at<Dim4D::Channel>(out_sizes);
+  int32_t out_channels = dim_at<DIM4D_CHANNEL>(out_sizes);
 
   // Copy one batch at a time.
-  for (int batch_idx = 0; batch_idx < dim_at<Dim4D::Batch>(in_sizes);
+  for (int batch_idx = 0; batch_idx < dim_at<DIM4D_BATCH>(in_sizes);
        batch_idx++) {
     // Mapping the tensor NCHW coordinates into texture XYZ coordinates
     int32_t dst_first_z = dst_channel_offset / 4;
@@ -139,8 +139,8 @@ void add_copy_channel_offset_node(
         0, 0, dst_first_z + batch_idx * api::utils::div_up(out_channels, 4)};
 
     uvec3 global_size{
-        dim_at<Dim4D::Width>(in_sizes),
-        dim_at<Dim4D::Height>(in_sizes),
+        dim_at<DIM4D_WIDTH>(in_sizes),
+        dim_at<DIM4D_HEIGHT>(in_sizes),
         api::utils::safe_downcast<uint32_t>(dst_last_z - dst_first_z + 1)};
 
     uvec3 local_size = adaptive_work_group_size(global_size);
diff --git a/backends/vulkan/runtime/graph/ops/impl/Permute.cpp b/backends/vulkan/runtime/graph/ops/impl/Permute.cpp
@@ -70,8 +70,8 @@ void add_permute_node(
   kernel_name.reserve(kShaderNameReserve);
   add_dtype_suffix(kernel_name, *t_out);
 
-  uint32_t out_channels = dim_at<Dim4D::Channel>(t_out->sizes());
-  uint32_t in_channels = dim_at<Dim4D::Channel>(t_in->sizes());
+  uint32_t out_channels = dim_at<DIM4D_CHANNEL>(t_out->sizes());
+  uint32_t in_channels = dim_at<DIM4D_CHANNEL>(t_in->sizes());
 
   uint32_t out_c_aligned = api::utils::align_up(out_channels, 4u);
   uint32_t in_c_aligned = api::utils::align_up(in_channels, 4u);
diff --git a/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp b/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp
@@ -32,23 +32,23 @@ void check_args(
       "Input tensor dim size must be not greater than the repeat argument's size");
 
   VK_CHECK_COND(
-      dim_at<Dim4D::Width>(in.sizes()) * dim_at<Dim4D::Width>(repeats) ==
-          dim_at<Dim4D::Width>(out.sizes()),
+      dim_at<DIM4D_WIDTH>(in.sizes()) * dim_at<DIM4D_WIDTH>(repeats) ==
+          dim_at<DIM4D_WIDTH>(out.sizes()),
       "Output's width doesn't match input's width * repeat count");
 
   VK_CHECK_COND(
-      dim_at<Dim4D::Height>(in.sizes()) * dim_at<Dim4D::Height>(repeats) ==
-          dim_at<Dim4D::Height>(out.sizes()),
+      dim_at<DIM4D_HEIGHT>(in.sizes()) * dim_at<DIM4D_HEIGHT>(repeats) ==
+          dim_at<DIM4D_HEIGHT>(out.sizes()),
       "Output's height doesn't match input's height * repeat count");
 
   VK_CHECK_COND(
-      dim_at<Dim4D::Channel>(in.sizes()) * dim_at<Dim4D::Channel>(repeats) ==
-          dim_at<Dim4D::Channel>(out.sizes()),
+      dim_at<DIM4D_CHANNEL>(in.sizes()) * dim_at<DIM4D_CHANNEL>(repeats) ==
+          dim_at<DIM4D_CHANNEL>(out.sizes()),
       "Output's channel doesn't match input's channel * repeat count");
 
   VK_CHECK_COND(
-      dim_at<Dim4D::Batch>(in.sizes()) * dim_at<Dim4D::Batch>(repeats) ==
-          dim_at<Dim4D::Batch>(out.sizes()),
+      dim_at<DIM4D_BATCH>(in.sizes()) * dim_at<DIM4D_BATCH>(repeats) ==
+          dim_at<DIM4D_BATCH>(out.sizes()),
       "Output's batch doesn't match input's batch * repeat count");
 }
 
@@ -70,13 +70,13 @@ void add_repeat_channel_node(
   const std::vector<int64_t>& in_sizes = t_in->sizes();
 
   int32_t in_width =
-      api::utils::safe_downcast<int32_t>(dim_at<Dim4D::Width>(in_sizes));
+      api::utils::safe_downcast<int32_t>(dim_at<DIM4D_WIDTH>(in_sizes));
   int32_t in_height =
-      api::utils::safe_downcast<int32_t>(dim_at<Dim4D::Height>(in_sizes));
+      api::utils::safe_downcast<int32_t>(dim_at<DIM4D_HEIGHT>(in_sizes));
   int32_t in_channel =
-      api::utils::safe_downcast<int32_t>(dim_at<Dim4D::Channel>(in_sizes));
+      api::utils::safe_downcast<int32_t>(dim_at<DIM4D_CHANNEL>(in_sizes));
   int32_t in_batch =
-      api::utils::safe_downcast<int32_t>(dim_at<Dim4D::Batch>(in_sizes));
+      api::utils::safe_downcast<int32_t>(dim_at<DIM4D_BATCH>(in_sizes));
 
   int32_t out_channel = repeat_channel * in_channel;
 
@@ -142,7 +142,7 @@ void add_repeat_node(
   // dimension, we copy over the input texure to the output. In subsequent
   // dimensions, we read and write from the same tensor.
 
-  if (int64_t channel_repeat = dim_at<Dim4D::Channel>(repeats);
+  if (int64_t channel_repeat = dim_at<DIM4D_CHANNEL>(repeats);
       channel_repeat == 1) {
     // If no repeat, short-cut to a direct copy
     api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false);
@@ -156,12 +156,12 @@ void add_repeat_node(
 
   // TODO: refactor width, height, and batch into a common helper function.
   // Width
-  if (int64_t width_repeat = dim_at<Dim4D::Width>(repeats); width_repeat > 1) {
+  if (int64_t width_repeat = dim_at<DIM4D_WIDTH>(repeats); width_repeat > 1) {
     api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false);
 
     for (int i = 1; i < width_repeat; ++i) {
       api::utils::ivec3 dst_offset = api::utils::make_ivec3(
-          {i * dim_at<Dim4D::Width>(in_sizes), 0, 0}, false);
+          {i * dim_at<DIM4D_WIDTH>(in_sizes), 0, 0}, false);
 
       add_copy_offset_node(
           graph, out, running_range, src_offset, dst_offset, out);
@@ -171,13 +171,13 @@ void add_repeat_node(
   }
 
   // Height
-  if (int64_t height_repeat = dim_at<Dim4D::Height>(repeats);
+  if (int64_t height_repeat = dim_at<DIM4D_HEIGHT>(repeats);
       height_repeat > 1) {
     api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false);
 
     for (int i = 1; i < height_repeat; ++i) {
       api::utils::ivec3 dst_offset = api::utils::make_ivec3(
-          {0, i * dim_at<Dim4D::Height>(in_sizes), 0}, false);
+          {0, i * dim_at<DIM4D_HEIGHT>(in_sizes), 0}, false);
 
       add_copy_offset_node(
           graph, out, running_range, src_offset, dst_offset, out);
@@ -187,7 +187,7 @@ void add_repeat_node(
   }
 
   // Batch
-  if (int64_t batch_repeat = dim_at<Dim4D::Batch>(repeats); batch_repeat > 1) {
+  if (int64_t batch_repeat = dim_at<DIM4D_BATCH>(repeats); batch_repeat > 1) {
     api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false);
 
     for (int i = 1; i < batch_repeat; ++i) {
diff --git a/backends/vulkan/runtime/graph/ops/impl/Slice.cpp b/backends/vulkan/runtime/graph/ops/impl/Slice.cpp
@@ -43,8 +43,7 @@ void add_slice_tensor_out_node(
 
   dim = normalize(dim, t_in->dim());
 
-  // Create a dim value as in the underlying dim is 4-dimension.
-  int64_t nchw_dim = dim + (4 - t_in->dim());
+  Dim4DType dim4d = normalize_to_dim4d(*t_in, dim);
 
   std::optional<int64_t> opt_start =
       graph.extract_optional_scalar<int64_t>(opt_start_ref);
@@ -61,7 +60,7 @@ void add_slice_tensor_out_node(
   VK_CHECK_COND((0 <= start) && (start < in_sizes[dim]));
   VK_CHECK_COND((0 <= end) && (end <= in_sizes[dim]));
 
-  if (nchw_dim == 1) {
+  if (dim4d == DIM4D_CHANNEL) {
     // slice by channel
     std::string kernel_name = "slice_channel";
     kernel_name.reserve(kShaderNameReserve);
@@ -93,17 +92,17 @@ void add_slice_tensor_out_node(
     // GPU's coordinate is in x, y, z
     int64_t gpu_dim = -1;
     int64_t stride = 1;
-    if (nchw_dim == 3) {
+    if (dim4d == DIM4D_WIDTH) {
       gpu_dim = 0; // width: x dimension in gpu
       VK_CHECK_COND(out_sizes[dim] == (1 + (end - start - 1) / step));
-    } else if (nchw_dim == 2) {
+    } else if (dim4d == DIM4D_HEIGHT) {
       gpu_dim = 1; // height: y dimension
       VK_CHECK_COND(out_sizes[dim] == (1 + (end - start - 1) / step));
-    } else if (nchw_dim == 0) {
+    } else if (dim4d == DIM4D_BATCH) {
       gpu_dim = 2; // batch: z dimension
 
       // Due to channel packing, each batch value is span over stride planes
-      int64_t n_channels = dim_at<Dim4D::Channel>(in_sizes);
+      int64_t n_channels = dim_at(in_sizes, DIM4D_CHANNEL);
       stride = api::utils::div_up<int64_t>(n_channels, 4ll);
     } else {
       VK_THROW("Unexpected ncwh_dim!");
diff --git a/backends/vulkan/runtime/graph/ops/impl/Split.cpp b/backends/vulkan/runtime/graph/ops/impl/Split.cpp
@@ -29,7 +29,7 @@ void add_split_with_sizes_default_node(
 
   ValueListPtr out_list = graph.get_value_list(out_list_ref);
 
-  NchwDim nchw_dim = normalize_to_nchw_dim(*t_in, dim);
+  Dim4DType dim4d = normalize_to_dim4d(*t_in, dim);
 
   VK_CHECK_COND(out_list->size() == split_sizes.size());
 
@@ -39,10 +39,10 @@ void add_split_with_sizes_default_node(
 
     vTensorPtr t_out = graph.get_tensor(out_ref);
     VK_CHECK_COND(check_memory_layout_is(*t_out, api::kChannelsPacked));
-    VK_CHECK_COND(dim_at(*t_out, nchw_dim) == split_size);
+    VK_CHECK_COND(dim_at(*t_out, dim4d) == split_size);
   }
 
-  if (nchw_dim == DimWidth) {
+  if (dim4d == DIM4D_WIDTH) {
     api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false);
     api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false);
 
@@ -55,7 +55,7 @@ void add_split_with_sizes_default_node(
 
       src_offset.data[0] += range.data[0];
     }
-  } else if (nchw_dim == DimHeight) {
+  } else if (dim4d == DIM4D_HEIGHT) {
     api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false);
     api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false);
 
@@ -66,7 +66,7 @@ void add_split_with_sizes_default_node(
 
       src_offset.data[1] += range.data[1];
     }
-  } else if (nchw_dim == DimBatch) {
+  } else if (dim4d == DIM4D_BATCH) {
     api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false);
     api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false);
 
@@ -77,13 +77,13 @@ void add_split_with_sizes_default_node(
 
       src_offset.data[2] += range.data[2];
     }
-  } else if (nchw_dim == DimChannel) {
+  } else if (dim4d == DIM4D_CHANNEL) {
     int32_t src_offset = 0;
     int32_t dst_offset = 0;
 
     for (ValueRef out_ref : *out_list) {
       vTensorPtr t_out = graph.get_tensor(out_ref);
-      int32_t range = dim_at<Dim4D::Channel>(t_out->sizes());
+      int32_t range = dim_at<DIM4D_CHANNEL>(t_out->sizes());
       add_copy_channel_offset_node(
           graph, in, range, src_offset, dst_offset, out_ref);
       src_offset += range;
@@ -122,8 +122,8 @@ void add_split_tensor_node(
   int64_t dim = graph.extract_scalar<int64_t>(dim_ref);
 
   vTensorPtr t_in = graph.get_tensor(in);
-  NchwDim nchw_dim = normalize_to_nchw_dim(*t_in, dim);
-  int64_t size = dim_at(*t_in, nchw_dim);
+  Dim4DType dim4d = normalize_to_dim4d(*t_in, dim);
+  int64_t size = dim_at(*t_in, dim4d);
   std::vector<int64_t> split_sizes(size / split_size, split_size);
 
   add_split_with_sizes_default_node(graph, in, split_sizes, dim, out);
diff --git a/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h b/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h