IntelPython
diff --git a/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp
Lines changed: 6 additions & 3 deletions b/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp
Lines changed: 6 additions & 3 deletions
diff --git a/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp
Lines changed: 6 additions & 3 deletions b/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp
Lines changed: 6 additions & 3 deletions
diff --git a/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp
Lines changed: 6 additions & 3 deletions b/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp
Lines changed: 6 additions & 3 deletions
diff --git a/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/add.hpp
Lines changed: 8 additions & 4 deletions b/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/add.hpp
Lines changed: 8 additions & 4 deletions
diff --git a/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/angle.hpp
Lines changed: 6 additions & 3 deletions b/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/angle.hpp
Lines changed: 6 additions & 3 deletions
diff --git a/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp
Lines changed: 6 additions & 3 deletions b/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp
Lines changed: 6 additions & 3 deletions
diff --git a/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp
Lines changed: 6 additions & 3 deletions b/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp
Lines changed: 6 additions & 3 deletions
diff --git a/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan.hpp
Lines changed: 6 additions & 3 deletions b/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan.hpp
Lines changed: 6 additions & 3 deletions
diff --git a/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan2.hpp
Lines changed: 6 additions & 2 deletions b/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan2.hpp
Lines changed: 6 additions & 2 deletions
diff --git a/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/atanh.hpp
Lines changed: 6 additions & 3 deletions b/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/atanh.hpp
Lines changed: 6 additions & 3 deletions
diff --git a/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_and.hpp
Lines changed: 8 additions & 4 deletions b/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_and.hpp
Lines changed: 8 additions & 4 deletions
diff --git a/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_invert.hpp
Lines changed: 5 additions & 2 deletions b/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_invert.hpp
Lines changed: 5 additions & 2 deletions
@@ -32,9 +32,11 @@
 #include <type_traits>
 
 #include "cabs_impl.hpp"
-#include "kernels/elementwise_functions/common.hpp"
+#include "vec_size_util.hpp"
 
 #include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
 #include "utils/offset_utils.hpp"
 #include "utils/type_dispatch_building.hpp"
 #include "utils/type_utils.hpp"
@@ -50,6 +52,7 @@ namespace abs
 
 namespace td_ns = dpctl::tensor::type_dispatch;
 
+using dpctl::tensor::kernels::vec_size_utils::VecSize_v;
 using dpctl::tensor::type_utils::is_complex;
 
 template <typename argT, typename resT> struct AbsFunctor
@@ -89,8 +92,8 @@ template <typename argT, typename resT> struct AbsFunctor
 
 template <typename argT,
           typename resT = argT,
-          unsigned int vec_sz = 4,
-          unsigned int n_vecs = 2,
+          unsigned int vec_sz = VecSize_v<argT, resT>,
+          unsigned int n_vecs = 1,
           bool enable_sg_loadstore = true>
 using AbsContigFunctor =
     elementwise_common::UnaryContigFunctor<argT,
 
@@ -29,10 +29,12 @@
 #include <sycl/sycl.hpp>
 #include <type_traits>
 
-#include "kernels/elementwise_functions/common.hpp"
 #include "sycl_complex.hpp"
+#include "vec_size_util.hpp"
 
 #include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
 #include "utils/offset_utils.hpp"
 #include "utils/type_dispatch_building.hpp"
 #include "utils/type_utils.hpp"
@@ -48,6 +50,7 @@ namespace acos
 
 namespace td_ns = dpctl::tensor::type_dispatch;
 
+using dpctl::tensor::kernels::vec_size_utils::VecSize_v;
 using dpctl::tensor::type_utils::is_complex;
 
 template <typename argT, typename resT> struct AcosFunctor
@@ -128,8 +131,8 @@ template <typename argT, typename resT> struct AcosFunctor
 
 template <typename argTy,
           typename resTy = argTy,
-          unsigned int vec_sz = 4,
-          unsigned int n_vecs = 2,
+          unsigned int vec_sz = VecSize_v<argTy, resTy>,
+          unsigned int n_vecs = 1,
           bool enable_sg_loadstore = true>
 using AcosContigFunctor =
     elementwise_common::UnaryContigFunctor<argTy,
 
@@ -29,10 +29,12 @@
 #include <sycl/sycl.hpp>
 #include <type_traits>
 
-#include "kernels/elementwise_functions/common.hpp"
 #include "sycl_complex.hpp"
+#include "vec_size_util.hpp"
 
 #include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
 #include "utils/offset_utils.hpp"
 #include "utils/type_dispatch_building.hpp"
 #include "utils/type_utils.hpp"
@@ -48,6 +50,7 @@ namespace acosh
 
 namespace td_ns = dpctl::tensor::type_dispatch;
 
+using dpctl::tensor::kernels::vec_size_utils::VecSize_v;
 using dpctl::tensor::type_utils::is_complex;
 
 template <typename argT, typename resT> struct AcoshFunctor
@@ -155,8 +158,8 @@ template <typename argT, typename resT> struct AcoshFunctor
 
 template <typename argTy,
           typename resTy = argTy,
-          unsigned int vec_sz = 4,
-          unsigned int n_vecs = 2,
+          unsigned int vec_sz = VecSize_v<argTy, resTy>,
+          unsigned int n_vecs = 1,
           bool enable_sg_loadstore = true>
 using AcoshContigFunctor =
     elementwise_common::UnaryContigFunctor<argTy,
 
@@ -30,6 +30,8 @@
 #include <type_traits>
 
 #include "sycl_complex.hpp"
+#include "vec_size_util.hpp"
+
 #include "utils/offset_utils.hpp"
 #include "utils/type_dispatch_building.hpp"
 #include "utils/type_utils.hpp"
@@ -50,6 +52,8 @@ namespace add
 namespace td_ns = dpctl::tensor::type_dispatch;
 namespace tu_ns = dpctl::tensor::type_utils;
 
+using dpctl::tensor::kernels::vec_size_utils::VecSize_v;
+
 template <typename argT1, typename argT2, typename resT> struct AddFunctor
 {
 
@@ -110,8 +114,8 @@ template <typename argT1, typename argT2, typename resT> struct AddFunctor
 template <typename argT1,
           typename argT2,
           typename resT,
-          unsigned int vec_sz = 4,
-          unsigned int n_vecs = 2,
+          unsigned int vec_sz = VecSize_v<argT1, argT2, resT>,
+          unsigned int n_vecs = 1,
           bool enable_sg_loadstore = true>
 using AddContigFunctor =
     elementwise_common::BinaryContigFunctor<argT1,
@@ -410,8 +414,8 @@ template <typename argT, typename resT> struct AddInplaceFunctor
 
 template <typename argT,
           typename resT,
-          unsigned int vec_sz = 4,
-          unsigned int n_vecs = 2,
+          unsigned int vec_sz = VecSize_v<argT, resT>,
+          unsigned int n_vecs = 1,
           bool enable_sg_loadstore = true>
 using AddInplaceContigFunctor = elementwise_common::BinaryInplaceContigFunctor<
     argT,
 
@@ -30,10 +30,12 @@
 #include <sycl/sycl.hpp>
 #include <type_traits>
 
-#include "kernels/elementwise_functions/common.hpp"
 #include "sycl_complex.hpp"
+#include "vec_size_util.hpp"
 
 #include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
 #include "utils/offset_utils.hpp"
 #include "utils/type_dispatch_building.hpp"
 #include "utils/type_utils.hpp"
@@ -49,6 +51,7 @@ namespace angle
 
 namespace td_ns = dpctl::tensor::type_dispatch;
 
+using dpctl::tensor::kernels::vec_size_utils::VecSize_v;
 using dpctl::tensor::type_utils::is_complex;
 
 template <typename argT, typename resT> struct AngleFunctor
@@ -74,8 +77,8 @@ template <typename argT, typename resT> struct AngleFunctor
 
 template <typename argTy,
           typename resTy = argTy,
-          unsigned int vec_sz = 4,
-          unsigned int n_vecs = 2,
+          unsigned int vec_sz = VecSize_v<argTy, resTy>,
+          unsigned int n_vecs = 1,
           bool enable_sg_loadstore = true>
 using AngleContigFunctor =
     elementwise_common::UnaryContigFunctor<argTy,
 
@@ -29,10 +29,12 @@
 #include <sycl/sycl.hpp>
 #include <type_traits>
 
-#include "kernels/elementwise_functions/common.hpp"
 #include "sycl_complex.hpp"
+#include "vec_size_util.hpp"
 
 #include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
 #include "utils/offset_utils.hpp"
 #include "utils/type_dispatch_building.hpp"
 #include "utils/type_utils.hpp"
@@ -48,6 +50,7 @@ namespace asin
 
 namespace td_ns = dpctl::tensor::type_dispatch;
 
+using dpctl::tensor::kernels::vec_size_utils::VecSize_v;
 using dpctl::tensor::type_utils::is_complex;
 
 template <typename argT, typename resT> struct AsinFunctor
@@ -148,8 +151,8 @@ template <typename argT, typename resT> struct AsinFunctor
 
 template <typename argTy,
           typename resTy = argTy,
-          unsigned int vec_sz = 4,
-          unsigned int n_vecs = 2,
+          unsigned int vec_sz = VecSize_v<argTy, resTy>,
+          unsigned int n_vecs = 1,
           bool enable_sg_loadstore = true>
 using AsinContigFunctor =
     elementwise_common::UnaryContigFunctor<argTy,
 
@@ -29,10 +29,12 @@
 #include <sycl/sycl.hpp>
 #include <type_traits>
 
-#include "kernels/elementwise_functions/common.hpp"
 #include "sycl_complex.hpp"
+#include "vec_size_util.hpp"
 
 #include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
 #include "utils/offset_utils.hpp"
 #include "utils/type_dispatch_building.hpp"
 #include "utils/type_utils.hpp"
@@ -48,6 +50,7 @@ namespace asinh
 
 namespace td_ns = dpctl::tensor::type_dispatch;
 
+using dpctl::tensor::kernels::vec_size_utils::VecSize_v;
 using dpctl::tensor::type_utils::is_complex;
 
 template <typename argT, typename resT> struct AsinhFunctor
@@ -131,8 +134,8 @@ template <typename argT, typename resT> struct AsinhFunctor
 
 template <typename argTy,
           typename resTy = argTy,
-          unsigned int vec_sz = 4,
-          unsigned int n_vecs = 2,
+          unsigned int vec_sz = VecSize_v<argTy, resTy>,
+          unsigned int n_vecs = 1,
           bool enable_sg_loadstore = true>
 using AsinhContigFunctor =
     elementwise_common::UnaryContigFunctor<argTy,
 
@@ -30,10 +30,12 @@
 #include <sycl/sycl.hpp>
 #include <type_traits>
 
-#include "kernels/elementwise_functions/common.hpp"
 #include "sycl_complex.hpp"
+#include "vec_size_util.hpp"
 
 #include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
 #include "utils/offset_utils.hpp"
 #include "utils/type_dispatch_building.hpp"
 #include "utils/type_utils.hpp"
@@ -49,6 +51,7 @@ namespace atan
 
 namespace td_ns = dpctl::tensor::type_dispatch;
 
+using dpctl::tensor::kernels::vec_size_utils::VecSize_v;
 using dpctl::tensor::type_utils::is_complex;
 
 template <typename argT, typename resT> struct AtanFunctor
@@ -138,8 +141,8 @@ template <typename argT, typename resT> struct AtanFunctor
 
 template <typename argTy,
           typename resTy = argTy,
-          unsigned int vec_sz = 4,
-          unsigned int n_vecs = 2,
+          unsigned int vec_sz = VecSize_v<argTy, resTy>,
+          unsigned int n_vecs = 1,
           bool enable_sg_loadstore = true>
 using AtanContigFunctor =
     elementwise_common::UnaryContigFunctor<argTy,
 
@@ -29,6 +29,8 @@
 #include <sycl/sycl.hpp>
 #include <type_traits>
 
+#include "vec_size_util.hpp"
+
 #include "utils/offset_utils.hpp"
 #include "utils/type_dispatch_building.hpp"
 #include "utils/type_utils.hpp"
@@ -48,6 +50,8 @@ namespace atan2
 namespace td_ns = dpctl::tensor::type_dispatch;
 namespace tu_ns = dpctl::tensor::type_utils;
 
+using dpctl::tensor::kernels::vec_size_utils::VecSize_v;
+
 template <typename argT1, typename argT2, typename resT> struct Atan2Functor
 {
 
@@ -68,8 +72,8 @@ template <typename argT1, typename argT2, typename resT> struct Atan2Functor
 template <typename argT1,
           typename argT2,
           typename resT,
-          unsigned int vec_sz = 4,
-          unsigned int n_vecs = 2,
+          unsigned int vec_sz = VecSize_v<argT1, argT2, resT>,
+          unsigned int n_vecs = 1,
           bool enable_sg_loadstore = true>
 using Atan2ContigFunctor =
     elementwise_common::BinaryContigFunctor<argT1,
 
@@ -30,10 +30,12 @@
 #include <sycl/sycl.hpp>
 #include <type_traits>
 
-#include "kernels/elementwise_functions/common.hpp"
 #include "sycl_complex.hpp"
+#include "vec_size_util.hpp"
 
 #include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
 #include "utils/offset_utils.hpp"
 #include "utils/type_dispatch_building.hpp"
 #include "utils/type_utils.hpp"
@@ -49,6 +51,7 @@ namespace atanh
 
 namespace td_ns = dpctl::tensor::type_dispatch;
 
+using dpctl::tensor::kernels::vec_size_utils::VecSize_v;
 using dpctl::tensor::type_utils::is_complex;
 
 template <typename argT, typename resT> struct AtanhFunctor
@@ -132,8 +135,8 @@ template <typename argT, typename resT> struct AtanhFunctor
 
 template <typename argTy,
           typename resTy = argTy,
-          unsigned int vec_sz = 4,
-          unsigned int n_vecs = 2,
+          unsigned int vec_sz = VecSize_v<argTy, resTy>,
+          unsigned int n_vecs = 1,
           bool enable_sg_loadstore = true>
 using AtanhContigFunctor =
     elementwise_common::UnaryContigFunctor<argTy,
 
@@ -28,6 +28,8 @@
 #include <sycl/sycl.hpp>
 #include <type_traits>
 
+#include "vec_size_util.hpp"
+
 #include "utils/offset_utils.hpp"
 #include "utils/type_dispatch_building.hpp"
 #include "utils/type_utils.hpp"
@@ -48,6 +50,8 @@ namespace bitwise_and
 namespace td_ns = dpctl::tensor::type_dispatch;
 namespace tu_ns = dpctl::tensor::type_utils;
 
+using dpctl::tensor::kernels::vec_size_utils::VecSize_v;
+
 template <typename argT1, typename argT2, typename resT>
 struct BitwiseAndFunctor
 {
@@ -91,8 +95,8 @@ struct BitwiseAndFunctor
 template <typename argT1,
           typename argT2,
           typename resT,
-          unsigned int vec_sz = 4,
-          unsigned int n_vecs = 2,
+          unsigned int vec_sz = VecSize_v<argT1, argT2, resT>,
+          unsigned int n_vecs = 1,
           bool enable_sg_loadstore = true>
 using BitwiseAndContigFunctor = elementwise_common::BinaryContigFunctor<
     argT1,
@@ -290,8 +294,8 @@ template <typename argT, typename resT> struct BitwiseAndInplaceFunctor
 
 template <typename argT,
           typename resT,
-          unsigned int vec_sz = 4,
-          unsigned int n_vecs = 2,
+          unsigned int vec_sz = VecSize_v<argT, resT>,
+          unsigned int n_vecs = 1,
           bool enable_sg_loadstore = true>
 using BitwiseAndInplaceContigFunctor =
     elementwise_common::BinaryInplaceContigFunctor<
 
@@ -30,6 +30,8 @@
 #include <sycl/sycl.hpp>
 #include <type_traits>
 
+#include "vec_size_util.hpp"
+
 #include "utils/offset_utils.hpp"
 #include "utils/type_dispatch_building.hpp"
 #include "utils/type_utils.hpp"
@@ -49,6 +51,7 @@ namespace bitwise_invert
 namespace td_ns = dpctl::tensor::type_dispatch;
 namespace tu_ns = dpctl::tensor::type_utils;
 
+using dpctl::tensor::kernels::vec_size_utils::VecSize_v;
 using dpctl::tensor::type_utils::vec_cast;
 
 template <typename argT, typename resT> struct BitwiseInvertFunctor
@@ -80,8 +83,8 @@ template <typename argT, typename resT> struct BitwiseInvertFunctor
 
 template <typename argT,
           typename resT = argT,
-          unsigned int vec_sz = 4,
-          unsigned int n_vecs = 2,
+          unsigned int vec_sz = VecSize_v<argT, resT>,
+          unsigned int n_vecs = 1,
           bool enable_sg_loadstore = true>
 using BitwiseInvertContigFunctor =
     elementwise_common::UnaryContigFunctor<argT,