pytorch · facebook-github-bot · Sep 6, 2024 · Aug 29, 2024 · Aug 29, 2024 · Aug 29, 2024
@@ -67,39 +67,31 @@ class TextDecoderRunner {
    * @return The next token.
    */
   inline int32_t logits_to_token(const exec_aten::Tensor& logits_tensor) {
-    switch (logits_tensor.scalar_type()) {
-      // If the logit_tensor rank is 3, the shape is [batch, seq_length,
-      // vocab_size], get the last logits, sample and return. Else the model
-      // outputs the last logit, directly sample and return.
-      case exec_aten::ScalarType::Float: {
-        float* logits = logits_tensor.mutable_data_ptr<float>();
-        if (logits_tensor.dim() == 3) {
-          auto num_tokens = logits_tensor.size(1);
-          auto vocab_size = logits_tensor.size(2);
-          float* logits_last = logits;
-          logits_last += (num_tokens - 1) * vocab_size;
-          return sampler_->sample(logits_last);
-        }
-        return sampler_->sample(logits);
-      }
-      case exec_aten::ScalarType::Half: {
-        exec_aten::Half* logits =
-            logits_tensor.mutable_data_ptr<exec_aten::Half>();
-        if (logits_tensor.dim() == 3) {
-          auto num_tokens = logits_tensor.size(1);
-          auto vocab_size = logits_tensor.size(2);
-          exec_aten::Half* logits_last = logits;
-          logits_last += (num_tokens - 1) * vocab_size;
-          return sampler_->sample(logits_last);
-        }
-        return sampler_->sample(logits);
-      }
-      default:
-        ET_CHECK_MSG(
-            false,
-            "Unsupported dtype output %hhd",
-            static_cast<int8_t>(logits_tensor.scalar_type()));
-    }
+    int32_t result = 0;
+    ET_SWITCH_THREE_TYPES(
+        Float,
+        Half,
+        BFloat16,
+        logits_tensor.scalar_type(),
+        unused,
+        "logits_to_token",
+        CTYPE,
+        [&]() {
+          // If the logit_tensor rank is 3, the shape is [batch, seq_length,
+          // vocab_size], get the last logits, sample and return. Else the model
+          // outputs the last logit, directly sample and return.
+          auto* logits = logits_tensor.mutable_data_ptr<CTYPE>();
+          if (logits_tensor.dim() == 3) {
+            auto num_tokens = logits_tensor.size(1);
+            auto vocab_size = logits_tensor.size(2);
+            auto* logits_last = logits;
+            logits_last += (num_tokens - 1) * vocab_size;
+            result = sampler_->sample(logits_last);
+          } else {
+            result = sampler_->sample(logits);
+          }
+        });
+    return result;
   }
 
  protected:

@@ -192,6 +192,8 @@ int32_t Sampler::sample(T* logits) {
 
 template int32_t Sampler::sample<float>(float* logits);
 template int32_t Sampler::sample<exec_aten::Half>(exec_aten::Half* logits);
+template int32_t Sampler::sample<exec_aten::BFloat16>(
+    exec_aten::BFloat16* logits);
 
 } // namespace llm
 } // namespace extension

@@ -83,7 +83,8 @@ Tensor& opt_add_out(
   ScalarType out_type = out.scalar_type();
 
   if (b.numel() == 1) {
-    if (a_type == b_type && a_type == out_type && a_type != ScalarType::Half) {
+    if (a_type == b_type && a_type == out_type && a_type != ScalarType::Half &&
+        a_type != ScalarType::BFloat16) {
       auto error = resize_tensor(out, a.sizes());
       ET_KERNEL_CHECK_MSG(
           ctx,
@@ -186,12 +187,12 @@ Tensor& opt_add_out(
         InvalidArgument,
         out);
 
-    ET_SWITCH_REALHB_TYPES(a_type, ctx, "add.out", CTYPE_A, [&]() {
-      ET_SWITCH_REALHB_TYPES(b_type, ctx, "add.out", CTYPE_B, [&]() {
+    ET_SWITCH_REALHBBF16_TYPES(a_type, ctx, "add.out", CTYPE_A, [&]() {
+      ET_SWITCH_REALHBBF16_TYPES(b_type, ctx, "add.out", CTYPE_B, [&]() {
         using CTYPE_IN = typename torch::executor::
             promote_types<CTYPE_A, CTYPE_B, /*half_to_float*/ true>::type;
         ET_DCHECK(CppTypeToScalarType<CTYPE_IN>::value == common_type);
-        ET_SWITCH_REALHB_TYPES(out_type, ctx, "add.out", CTYPE_OUT, [&]() {
+        ET_SWITCH_REALHBBF16_TYPES(out_type, ctx, "add.out", CTYPE_OUT, [&]() {
           CTYPE_IN alpha_val;
           ET_KERNEL_CHECK(
               ctx, utils::extract_scalar(alpha, &alpha_val), InvalidArgument, );
@@ -226,7 +227,7 @@ Tensor& opt_add_scalar_out(
 
   ET_CHECK(common_type == out_type);
 
-  if (common_type == ScalarType::Half) {
+  if (common_type == ScalarType::Half || common_type == ScalarType::BFloat16) {
     common_type = ScalarType::Float;
   }
 
@@ -235,7 +236,7 @@ Tensor& opt_add_scalar_out(
   ET_CHECK_MSG(error == Error::Ok, "Failed to resize output tensor.");
 
   if (a_type == common_type && a_type == out_type &&
-      a_type != ScalarType::Half) {
+      a_type != ScalarType::Half && a_type != ScalarType::BFloat16) {
     ET_SWITCH_REALB_TYPES(a_type, ctx, "add.Scalar_out", CTYPE, [&]() {
       ET_SWITCH_SCALAR_OBJ_TYPES(b_type, ctx, "add.Scalar_out", CTYPE_B, [&]() {
         CTYPE_B b_val;
@@ -255,11 +256,11 @@ Tensor& opt_add_scalar_out(
       });
     });
   } else {
-    ET_SWITCH_REALHB_TYPES(a_type, ctx, "add.Scalar_out", CTYPE_A, [&]() {
+    ET_SWITCH_REALHBBF16_TYPES(a_type, ctx, "add.Scalar_out", CTYPE_A, [&]() {
       ET_SWITCH_SCALAR_OBJ_TYPES(b_type, ctx, "add.Scalar_out", CTYPE_B, [&]() {
         ET_SWITCH_REALB_TYPES(
             common_type, ctx, "add.Scalar_out", CTYPE_IN, [&]() {
-              ET_SWITCH_REALHB_TYPES(
+              ET_SWITCH_REALHBBF16_TYPES(
                   out_type, ctx, "add.Scalar_out", CTYPE_OUT, [&]() {
                     CTYPE_B b_val;
                     ET_EXTRACT_SCALAR(b, b_val);

@@ -78,7 +78,11 @@ Tensor& add_out(
       InvalidArgument,
       out);
 
-  ET_KERNEL_CHECK(ctx, tensor_is_realhb_type(out), InvalidArgument, out);
+  ET_KERNEL_CHECK(
+      ctx,
+      executorch::runtime::tensor_is_realhbbf16_type(out),
+      InvalidArgument,
+      out);
   ET_KERNEL_CHECK(
       ctx, tensors_have_same_dim_order(a, b, out), InvalidArgument, out);
 
@@ -94,15 +98,15 @@ Tensor& add_out(
 
   constexpr auto name = "add.out";
 
-  ET_SWITCH_REALHB_TYPES(a_type, ctx, name, CTYPE_A, [&]() {
-    ET_SWITCH_REALHB_TYPES(b_type, ctx, name, CTYPE_B, [&]() {
+  ET_SWITCH_REALHBBF16_TYPES(a_type, ctx, name, CTYPE_A, [&]() {
+    ET_SWITCH_REALHBBF16_TYPES(b_type, ctx, name, CTYPE_B, [&]() {
       using CTYPE_IN = typename torch::executor::
           promote_types<CTYPE_A, CTYPE_B, /*half_to_float*/ true>::type;
       ET_DCHECK(CppTypeToScalarType<CTYPE_IN>::value == common_type);
       CTYPE_IN alpha_val;
       utils::extract_scalar(alpha, &alpha_val);
 
-      ET_SWITCH_REALHB_TYPES(out_type, ctx, name, CTYPE_OUT, [&]() {
+      ET_SWITCH_REALHBBF16_TYPES(out_type, ctx, name, CTYPE_OUT, [&]() {
         AddInner<
             can_cast<CTYPE_IN, CTYPE_OUT>::value,
             CTYPE_A,
@@ -132,7 +136,11 @@ Tensor& add_scalar_out(
       out,
       "Failed to resize output tensor.");
 
-  ET_KERNEL_CHECK(ctx, tensor_is_realhb_type(out), InvalidArgument, out);
+  ET_KERNEL_CHECK(
+      ctx,
+      executorch::runtime::tensor_is_realhbbf16_type(out),
+      InvalidArgument,
+      out);
   ET_KERNEL_CHECK(
       ctx, tensors_have_same_dim_order(a, out), InvalidArgument, out);
 
@@ -153,7 +161,7 @@ Tensor& add_scalar_out(
 
   constexpr auto name = "add.Scalar_out";
 
-  ET_SWITCH_REALHB_TYPES(a_type, ctx, name, CTYPE_A, [&]() {
+  ET_SWITCH_REALHBBF16_TYPES(a_type, ctx, name, CTYPE_A, [&]() {
     ET_SWITCH_SCALAR_OBJ_TYPES(b_type, ctx, name, CTYPE_B, [&]() {
       using CTYPE_IN = typename utils::promote_type_with_scalar_type<
           CTYPE_A,

@@ -45,8 +45,8 @@ Tensor& copy_out(
   ScalarType in_type = in.scalar_type();
   ScalarType src_type = src.scalar_type();
 
-  ET_SWITCH_REALHB_TYPES(in_type, ctx, "copy.out", CTYPE, [&]() {
-    ET_SWITCH_REALHB_TYPES(src_type, ctx, "copy.out", CTYPE_SRC, [&]() {
+  ET_SWITCH_REALHBBF16_TYPES(in_type, ctx, "copy.out", CTYPE, [&]() {
+    ET_SWITCH_REALHBBF16_TYPES(src_type, ctx, "copy.out", CTYPE_SRC, [&]() {
       apply_binary_elementwise_fn<CTYPE, CTYPE_SRC, CTYPE>(
           [](const CTYPE val_in, const CTYPE_SRC val_src) {
             return convert<CTYPE, CTYPE_SRC>(val_src);
@@ -75,8 +75,8 @@ copy_(RuntimeContext& ctx, Tensor& in, const Tensor& src, bool non_blocking) {
   ScalarType in_type = in.scalar_type();
   ScalarType src_type = src.scalar_type();
 
-  ET_SWITCH_REALHB_TYPES(in_type, ctx, "copy_", CTYPE, [&]() {
-    ET_SWITCH_REALHB_TYPES(src_type, ctx, "copy_", CTYPE_SRC, [&]() {
+  ET_SWITCH_REALHBBF16_TYPES(in_type, ctx, "copy_", CTYPE, [&]() {
+    ET_SWITCH_REALHBBF16_TYPES(src_type, ctx, "copy_", CTYPE_SRC, [&]() {
       apply_binary_elementwise_fn<CTYPE, CTYPE_SRC, CTYPE>(
           [](const CTYPE val_in, const CTYPE_SRC val_src) {
             return convert<CTYPE, CTYPE_SRC>(val_src);

@@ -34,19 +34,20 @@ mm_out(RuntimeContext& ctx, const Tensor& in, const Tensor& mat2, Tensor& out) {
 
   ET_KERNEL_CHECK(ctx, tensor_is_default_dim_order(in), InvalidArgument, out);
 
-  ET_SWITCH_REAL_TYPES_AND(Half, in.scalar_type(), ctx, "mm.out", CTYPE, [&]() {
-    size_t m = in.size(0);
-    size_t n = in.size(1);
-    size_t p = mat2.size(1);
-
-    vec_matmul<CTYPE>(
-        out.mutable_data_ptr<CTYPE>(),
-        in.const_data_ptr<CTYPE>(),
-        mat2.const_data_ptr<CTYPE>(),
-        m,
-        n,
-        p);
-  });
+  ET_SWITCH_REAL_TYPES_AND2(
+      Half, BFloat16, in.scalar_type(), ctx, "mm.out", CTYPE, [&]() {
+        size_t m = in.size(0);
+        size_t n = in.size(1);
+        size_t p = mat2.size(1);
+
+        vec_matmul<CTYPE>(
+            out.mutable_data_ptr<CTYPE>(),
+            in.const_data_ptr<CTYPE>(),
+            mat2.const_data_ptr<CTYPE>(),
+            m,
+            n,
+            p);
+      });
 
   return out;
 }

@@ -24,13 +24,14 @@ Tensor& scalar_tensor_out(RuntimeContext& ctx, const Scalar& s, Tensor& out) {
 
   constexpr auto name = "scalar_tensor.out";
 
-  ET_SWITCH_REALHB_TYPES(out_type, ctx, name, CTYPE, [&]() {
-    ET_SWITCH_SCALAR_OBJ_TYPES(s_type, ctx, name, CTYPE_S, [&]() {
-      CTYPE_S val_s;
-      utils::extract_scalar(s, &val_s);
-      out.mutable_data_ptr<CTYPE>()[0] = convert<CTYPE, CTYPE_S>(val_s);
-    });
-  });
+  ET_SWITCH_REAL_TYPES_AND3(
+      Half, Bool, BFloat16, out_type, ctx, name, CTYPE, [&]() {
+        ET_SWITCH_SCALAR_OBJ_TYPES(s_type, ctx, name, CTYPE_S, [&]() {
+          CTYPE_S val_s;
+          utils::extract_scalar(s, &val_s);
+          out.mutable_data_ptr<CTYPE>()[0] = convert<CTYPE, CTYPE_S>(val_s);
+        });
+      });
 
   return out;
 }

@@ -74,8 +74,8 @@ Tensor& slice_scatter_out(
   ScalarType in_type = input.scalar_type();
   ScalarType src_type = src.scalar_type();
 
-  ET_SWITCH_REALHB_TYPES(in_type, ctx, "slice_scatter.out", CTYPE, [&]() {
-    ET_SWITCH_REALHB_TYPES(
+  ET_SWITCH_REALHBBF16_TYPES(in_type, ctx, "slice_scatter.out", CTYPE, [&]() {
+    ET_SWITCH_REALHBBF16_TYPES(
         src_type, ctx, "slice_scatter.out", CTYPE_SRC, [&]() {
           CTYPE* out_data = out.mutable_data_ptr<CTYPE>();
           const CTYPE_SRC* src_data = src.const_data_ptr<CTYPE_SRC>();

@@ -41,8 +41,8 @@ Tensor& where_out(
       cond_type == ScalarType::Bool || cond_type == ScalarType::Byte,
       "Unhandled dtype %s for where.self_out",
       torch::executor::toString(cond_type));
-  ET_SWITCH_REALHB_TYPES(a_type, ctx, name, CTYPE_A, [&]() {
-    ET_SWITCH_REALHB_TYPES(b_type, ctx, name, CTYPE_B, [&]() {
+  ET_SWITCH_REALHBBF16_TYPES(a_type, ctx, name, CTYPE_A, [&]() {
+    ET_SWITCH_REALHBBF16_TYPES(b_type, ctx, name, CTYPE_B, [&]() {
       using CTYPE_OUT =
           typename torch::executor::promote_types<CTYPE_A, CTYPE_B>::type;
       apply_ternary_elementwise_fn<CTYPE_A, CTYPE_B, uint8_t, CTYPE_OUT>(

@@ -58,6 +58,7 @@ class OpAddOutKernelTest : public OperatorTest {
 
   template <ScalarType DTYPE_A, ScalarType DTYPE_B>
   void test_add_enumerate_out_types() {
+    test_add<DTYPE_A, DTYPE_B, ScalarType::BFloat16>();
     test_add<DTYPE_A, DTYPE_B, ScalarType::Half>();
     test_add<DTYPE_A, DTYPE_B, ScalarType::Float>();
     test_add<DTYPE_A, DTYPE_B, ScalarType::Double>();
@@ -73,7 +74,7 @@ class OpAddOutKernelTest : public OperatorTest {
 #define ENUMERATE_TEST_ENTRY(ctype, dtype) \
   test_add_enumerate_out_types<DTYPE_A, ScalarType::dtype>();
 
-    ET_FORALL_REAL_TYPES_AND(Half, ENUMERATE_TEST_ENTRY)
+    ET_FORALL_REALHBF16_TYPES(ENUMERATE_TEST_ENTRY)
 
 #undef ENUMERATE_TEST_ENTRY
   }
@@ -82,7 +83,7 @@ class OpAddOutKernelTest : public OperatorTest {
 #define ENUMERATE_TEST_ENTRY(ctype, dtype) \
   test_add_enumerate_b_types<ScalarType::dtype>();
 
-    ET_FORALL_REAL_TYPES_AND(Half, ENUMERATE_TEST_ENTRY)
+    ET_FORALL_REALHBF16_TYPES(ENUMERATE_TEST_ENTRY)
 
 #undef ENUMERATE_TEST_ENTRY
   }
@@ -99,13 +100,15 @@ class OpAddOutKernelTest : public OperatorTest {
 
     // Add two tensors.
     op_add_out(
-        tf.make(sizes, /*data=*/{1.1, 2.2, 4.4, 8.8}),
+        tf.make(sizes, /*data=*/{1.25, 2.25, 4.5, 8.875}),
         tf.ones(sizes),
-        /*alpha=*/1.1,
+        /*alpha=*/1.25,
         out);
 
-    // Check that it matches the expected output.
-    EXPECT_TENSOR_CLOSE(out, tf.make(sizes, /*data=*/{2.2, 3.3, 5.5, 9.9}));
+    // Check that it matches the expected output. Values selected to
+    // be exactly representable to avoid throwing off half/bfloat16
+    // tests.
+    EXPECT_TENSOR_CLOSE(out, tf.make(sizes, /*data=*/{2.5, 3.5, 5.75, 10.125}));
   }
 };
 
@@ -136,6 +139,14 @@ TEST_F(OpAddOutKernelTest, DoubleTensors) {
   test_floating_point_add_out<ScalarType::Double>();
 }
 
+TEST_F(OpAddOutKernelTest, HalfTensors) {
+  test_floating_point_add_out<ScalarType::Half>();
+}
+
+TEST_F(OpAddOutKernelTest, BFloat16Tensors) {
+  test_floating_point_add_out<ScalarType::BFloat16>();
+}
+
 TEST_F(OpAddOutKernelTest, BoolAndIntInputTensor) {
   TensorFactory<ScalarType::Bool> tf;
   TensorFactory<ScalarType::Int> tfi;

@@ -125,13 +125,13 @@ class OpCopyInplaceTest : public OperatorTest {
 // regular test for copy.out
 TEST_F(OpCopyTest, AllRealDtypesSupported) {
 #define TEST_ENTRY(ctype, dtype) test_dtype<ctype, ScalarType::dtype>();
-  ET_FORALL_REAL_TYPES(TEST_ENTRY);
+  ET_FORALL_REALHBF16_TYPES(TEST_ENTRY);
 #undef TEST_ENTRY
 }
 
 TEST_F(OpCopyTest, EmptyInputSupported) {
 #define TEST_ENTRY(ctype, dtype) test_empty_input<ctype, ScalarType::dtype>();
-  ET_FORALL_REAL_TYPES_AND(Bool, TEST_ENTRY);
+  ET_FORALL_REALHBF16_TYPES(TEST_ENTRY);
 #undef TEST_ENTRY
 }
 

@@ -81,7 +81,7 @@ TEST_F(OpMmOutTest, OutputDim) {
 /// zeros().
 TEST_F(OpMmOutTest, AllDtypesSupported) {
 #define TEST_ENTRY(ctype, dtype) test_dtype<ctype, ScalarType::dtype>();
-  ET_FORALL_REAL_TYPES_AND(Half, TEST_ENTRY);
+  ET_FORALL_REALHBF16_TYPES(TEST_ENTRY);
 #undef TEST_ENTRY
   // TODO: Also add tests for half, complex, quantized, and other types. Easiest
   // way to do that would be to make TensorFactory support zeros() and ones()

@@ -80,7 +80,7 @@ class OpScalarTensorOutTest : public OperatorTest {
     test_scalar_tensor_out_0d<ctype, ScalarType::dtype>(9); \
   }
 
-ET_FORALL_REAL_TYPES(GENERATE_TEST_0D)
+ET_FORALL_REAL_TYPES_AND3(Half, Bool, BFloat16, GENERATE_TEST_0D)
 
 #define GENERATE_TEST(ctype, dtype)                                    \
   TEST_F(OpScalarTensorOutTest, dtype##Tensors) {                      \
@@ -98,7 +98,7 @@ ET_FORALL_REAL_TYPES(GENERATE_TEST_0D)
     test_scalar_tensor_out_3d<ctype, ScalarType::dtype>(7);            \
   }
 
-ET_FORALL_REAL_TYPES(GENERATE_TEST)
+ET_FORALL_REAL_TYPES_AND3(Half, Bool, BFloat16, GENERATE_TEST)
 
 TEST_F(OpScalarTensorOutTest, InvalidOutShapeFails) {
   if (torch::executor::testing::SupportedFeatures::get()->is_aten) {