pytorch
diff --git a/‎.lintrunner.toml
Lines changed: 4 additions & 0 deletions b/‎.lintrunner.toml
Lines changed: 4 additions & 0 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 3 additions & 1 deletion b/‎CMakeLists.txt
Lines changed: 3 additions & 1 deletion
diff --git a/‎runtime/core/exec_aten/exec_aten.h
Lines changed: 0 additions & 1 deletion b/‎runtime/core/exec_aten/exec_aten.h
Lines changed: 0 additions & 1 deletion
diff --git a/‎runtime/core/portable_type/bfloat16.h
Lines changed: 6 additions & 322 deletions b/‎runtime/core/portable_type/bfloat16.h
Lines changed: 6 additions & 322 deletions
diff --git a/‎runtime/core/portable_type/c10/TARGETS
Lines changed: 8 additions & 0 deletions b/‎runtime/core/portable_type/c10/TARGETS
Lines changed: 8 additions & 0 deletions
@@ -77,6 +77,8 @@ exclude_patterns = [
     # File contains @generated
     'extension/llm/custom_ops/spinquant/fast_hadamard_transform_special.h',
     'extension/llm/custom_ops/spinquant/test/fast_hadamard_transform_special_unstrided_cpu.h',
+    # Want to be able to keep c10 in sync with PyTorch core.
+    'runtime/core/portable_type/c10/**',
 ]
 command = [
     'python',
@@ -260,6 +262,8 @@ exclude_patterns = [
     'extension/**',
     'kernels/optimized/**',
     'runtime/core/exec_aten/**',
+    # Want to be able to keep c10 in sync with PyTorch core.
+    'runtime/core/portable_type/c10/**',
     'runtime/executor/tensor_parser_aten.cpp',
     'scripts/**',
     'test/**',
 
@@ -337,7 +337,9 @@ if(NOT "${_repo_dir_name}" STREQUAL "executorch")
       "fix for this restriction."
   )
 endif()
-set(_common_include_directories ${CMAKE_CURRENT_SOURCE_DIR}/..)
+set(_common_include_directories ${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/runtime/core/portable_type)
+# We don't need any of C10's CMake macros.
+add_definitions(-DC10_USING_CUSTOM_GENERATED_MACROS)
 
 #
 # The `_<target>_srcs` lists are defined by including ${EXECUTORCH_SRCS_FILE}.
 
@@ -33,7 +33,6 @@
 #else // use executor
 #include <executorch/runtime/core/array_ref.h> // @manual
 #include <executorch/runtime/core/portable_type/bfloat16.h> // @manual
-#include <executorch/runtime/core/portable_type/bfloat16_math.h> // @manual
 #include <executorch/runtime/core/portable_type/complex.h> // @manual
 #include <executorch/runtime/core/portable_type/device.h> // @manual
 #include <executorch/runtime/core/portable_type/half.h> // @manual
 
@@ -8,260 +8,15 @@
 
 #pragma once
 
-#include <cmath>
-#include <cstdint>
-#include <cstring>
-#include <limits>
-#include <ostream>
-
-namespace executorch {
-namespace runtime {
-namespace etensor {
+#include <c10/util/BFloat16.h>
 
+namespace executorch::runtime::etensor {
+using c10::BFloat16;
 namespace internal {
-inline float f32_from_bits(uint16_t src) {
-  float res = 0;
-  uint32_t tmp = src;
-  tmp <<= 16;
-  std::memcpy(&res, &tmp, sizeof(tmp));
-  return res;
-}
-
-inline uint16_t round_to_nearest_even(float src) {
-  if (std::isnan(src)) {
-    return UINT16_C(0x7FC0);
-  }
-  uint32_t U32 = 0;
-  std::memcpy(&U32, &src, sizeof(U32));
-  uint32_t rounding_bias = ((U32 >> 16) & 1) + UINT32_C(0x7FFF);
-  return static_cast<uint16_t>((U32 + rounding_bias) >> 16);
-}
+using c10::detail::f32_from_bits;
+using c10::detail::round_to_nearest_even;
 } // namespace internal
-
-/**
- * The "brain floating-point" type, compatible with c10/util/BFloat16.h from
- * pytorch core.
- *
- * This representation uses 1 bit for the sign, 8 bits for the exponent and 7
- * bits for the mantissa.
- */
-struct alignas(2) BFloat16 {
-  uint16_t x;
-
-  BFloat16() = default;
-  struct from_bits_t {};
-  static constexpr from_bits_t from_bits() {
-    return from_bits_t();
-  }
-
-  constexpr BFloat16(unsigned short bits, from_bits_t) : x(bits) {}
-  /* implicit */ BFloat16(float value)
-      : x(internal::round_to_nearest_even(value)) {}
-  operator float() const {
-    return internal::f32_from_bits(x);
-  }
-};
-
-inline std::ostream& operator<<(std::ostream& out, const BFloat16& value) {
-  out << (float)value;
-  return out;
-}
-
-/// Arithmetic
-
-inline BFloat16 operator+(const BFloat16& a, const BFloat16& b) {
-  return static_cast<float>(a) + static_cast<float>(b);
-}
-
-inline BFloat16 operator-(const BFloat16& a, const BFloat16& b) {
-  return static_cast<float>(a) - static_cast<float>(b);
-}
-
-inline BFloat16 operator*(const BFloat16& a, const BFloat16& b) {
-  return static_cast<float>(a) * static_cast<float>(b);
-}
-
-inline BFloat16 operator/(const BFloat16& a, const BFloat16& b) {
-  return static_cast<float>(a) / static_cast<float>(b);
-}
-
-inline BFloat16 operator-(const BFloat16& a) {
-  return -static_cast<float>(a);
-}
-
-inline BFloat16& operator+=(BFloat16& a, const BFloat16& b) {
-  a = a + b;
-  return a;
-}
-
-inline BFloat16& operator-=(BFloat16& a, const BFloat16& b) {
-  a = a - b;
-  return a;
-}
-
-inline BFloat16& operator*=(BFloat16& a, const BFloat16& b) {
-  a = a * b;
-  return a;
-}
-
-inline BFloat16& operator/=(BFloat16& a, const BFloat16& b) {
-  a = a / b;
-  return a;
-}
-
-inline BFloat16& operator|(BFloat16& a, const BFloat16& b) {
-  a.x = a.x | b.x;
-  return a;
-}
-
-inline BFloat16& operator^(BFloat16& a, const BFloat16& b) {
-  a.x = a.x ^ b.x;
-  return a;
-}
-
-inline BFloat16& operator&(BFloat16& a, const BFloat16& b) {
-  a.x = a.x & b.x;
-  return a;
-}
-
-/// Arithmetic with floats
-
-inline float operator+(BFloat16 a, float b) {
-  return static_cast<float>(a) + b;
-}
-inline float operator-(BFloat16 a, float b) {
-  return static_cast<float>(a) - b;
-}
-inline float operator*(BFloat16 a, float b) {
-  return static_cast<float>(a) * b;
-}
-inline float operator/(BFloat16 a, float b) {
-  return static_cast<float>(a) / b;
-}
-
-inline float operator+(float a, BFloat16 b) {
-  return a + static_cast<float>(b);
-}
-inline float operator-(float a, BFloat16 b) {
-  return a - static_cast<float>(b);
-}
-inline float operator*(float a, BFloat16 b) {
-  return a * static_cast<float>(b);
-}
-inline float operator/(float a, BFloat16 b) {
-  return a / static_cast<float>(b);
-}
-
-inline float& operator+=(float& a, const BFloat16& b) {
-  return a += static_cast<float>(b);
-}
-inline float& operator-=(float& a, const BFloat16& b) {
-  return a -= static_cast<float>(b);
-}
-inline float& operator*=(float& a, const BFloat16& b) {
-  return a *= static_cast<float>(b);
-}
-inline float& operator/=(float& a, const BFloat16& b) {
-  return a /= static_cast<float>(b);
-}
-
-/// Arithmetic with doubles
-
-inline double operator+(BFloat16 a, double b) {
-  return static_cast<double>(a) + b;
-}
-inline double operator-(BFloat16 a, double b) {
-  return static_cast<double>(a) - b;
-}
-inline double operator*(BFloat16 a, double b) {
-  return static_cast<double>(a) * b;
-}
-inline double operator/(BFloat16 a, double b) {
-  return static_cast<double>(a) / b;
-}
-
-inline double operator+(double a, BFloat16 b) {
-  return a + static_cast<double>(b);
-}
-inline double operator-(double a, BFloat16 b) {
-  return a - static_cast<double>(b);
-}
-inline double operator*(double a, BFloat16 b) {
-  return a * static_cast<double>(b);
-}
-inline double operator/(double a, BFloat16 b) {
-  return a / static_cast<double>(b);
-}
-
-/// Arithmetic with ints
-
-inline BFloat16 operator+(BFloat16 a, int b) {
-  return a + static_cast<BFloat16>(b);
-}
-inline BFloat16 operator-(BFloat16 a, int b) {
-  return a - static_cast<BFloat16>(b);
-}
-inline BFloat16 operator*(BFloat16 a, int b) {
-  return a * static_cast<BFloat16>(b);
-}
-inline BFloat16 operator/(BFloat16 a, int b) {
-  return a / static_cast<BFloat16>(b);
-}
-
-inline BFloat16 operator+(int a, BFloat16 b) {
-  return static_cast<BFloat16>(a) + b;
-}
-inline BFloat16 operator-(int a, BFloat16 b) {
-  return static_cast<BFloat16>(a) - b;
-}
-inline BFloat16 operator*(int a, BFloat16 b) {
-  return static_cast<BFloat16>(a) * b;
-}
-inline BFloat16 operator/(int a, BFloat16 b) {
-  return static_cast<BFloat16>(a) / b;
-}
-
-//// Arithmetic with int64_t
-
-inline BFloat16 operator+(BFloat16 a, int64_t b) {
-  return a + static_cast<BFloat16>(b);
-}
-inline BFloat16 operator-(BFloat16 a, int64_t b) {
-  return a - static_cast<BFloat16>(b);
-}
-inline BFloat16 operator*(BFloat16 a, int64_t b) {
-  return a * static_cast<BFloat16>(b);
-}
-inline BFloat16 operator/(BFloat16 a, int64_t b) {
-  return a / static_cast<BFloat16>(b);
-}
-
-inline BFloat16 operator+(int64_t a, BFloat16 b) {
-  return static_cast<BFloat16>(a) + b;
-}
-inline BFloat16 operator-(int64_t a, BFloat16 b) {
-  return static_cast<BFloat16>(a) - b;
-}
-inline BFloat16 operator*(int64_t a, BFloat16 b) {
-  return static_cast<BFloat16>(a) * b;
-}
-inline BFloat16 operator/(int64_t a, BFloat16 b) {
-  return static_cast<BFloat16>(a) / b;
-}
-
-// Overloading < and > operators, because std::max and std::min use them.
-
-inline bool operator>(BFloat16& lhs, BFloat16& rhs) {
-  return float(lhs) > float(rhs);
-}
-
-inline bool operator<(BFloat16& lhs, BFloat16& rhs) {
-  return float(lhs) < float(rhs);
-}
-
-} // namespace etensor
-} // namespace runtime
-} // namespace executorch
+} // namespace executorch::runtime::etensor
 
 namespace torch {
 namespace executor {
@@ -270,74 +25,3 @@ namespace executor {
 using ::executorch::runtime::etensor::BFloat16;
 } // namespace executor
 } // namespace torch
-
-namespace std {
-
-template <>
-class numeric_limits<executorch::runtime::etensor::BFloat16> {
- public:
-  static constexpr bool is_signed = true;
-  static constexpr bool is_specialized = true;
-  static constexpr bool is_integer = false;
-  static constexpr bool is_exact = false;
-  static constexpr bool has_infinity = true;
-  static constexpr bool has_quiet_NaN = true;
-  static constexpr bool has_signaling_NaN = true;
-  static constexpr auto has_denorm = numeric_limits<float>::has_denorm;
-  static constexpr auto has_denorm_loss =
-      numeric_limits<float>::has_denorm_loss;
-  static constexpr auto round_style = numeric_limits<float>::round_style;
-  static constexpr bool is_iec559 = false;
-  static constexpr bool is_bounded = true;
-  static constexpr bool is_modulo = false;
-  static constexpr int digits = 8;
-  static constexpr int digits10 = 2;
-  static constexpr int max_digits10 = 4;
-  static constexpr int radix = 2;
-  static constexpr int min_exponent = -125;
-  static constexpr int min_exponent10 = -37;
-  static constexpr int max_exponent = 128;
-  static constexpr int max_exponent10 = 38;
-  static constexpr auto traps = numeric_limits<float>::traps;
-  static constexpr auto tinyness_before =
-      numeric_limits<float>::tinyness_before;
-
-  static constexpr torch::executor::BFloat16 min() {
-    return torch::executor::BFloat16(
-        0x0080, torch::executor::BFloat16::from_bits());
-  }
-  static constexpr torch::executor::BFloat16 lowest() {
-    return torch::executor::BFloat16(
-        0xFF7F, torch::executor::BFloat16::from_bits());
-  }
-  static constexpr torch::executor::BFloat16 max() {
-    return torch::executor::BFloat16(
-        0x7F7F, torch::executor::BFloat16::from_bits());
-  }
-  static constexpr torch::executor::BFloat16 epsilon() {
-    return torch::executor::BFloat16(
-        0x3C00, torch::executor::BFloat16::from_bits());
-  }
-  static constexpr torch::executor::BFloat16 round_error() {
-    return torch::executor::BFloat16(
-        0x3F00, torch::executor::BFloat16::from_bits());
-  }
-  static constexpr torch::executor::BFloat16 infinity() {
-    return torch::executor::BFloat16(
-        0x7F80, torch::executor::BFloat16::from_bits());
-  }
-  static constexpr torch::executor::BFloat16 quiet_NaN() {
-    return torch::executor::BFloat16(
-        0x7FC0, torch::executor::BFloat16::from_bits());
-  }
-  static constexpr torch::executor::BFloat16 signaling_NaN() {
-    return torch::executor::BFloat16(
-        0x7F80, torch::executor::BFloat16::from_bits());
-  }
-  static constexpr torch::executor::BFloat16 denorm_min() {
-    return torch::executor::BFloat16(
-        0x0001, torch::executor::BFloat16::from_bits());
-  }
-};
-
-} // namespace std
@@ -0,0 +1,8 @@
+# Any targets that should be shared between fbcode and xplat must be defined in
+# targets.bzl. This file can contain fbcode-only targets.
+
+load(":targets.bzl", "define_common_targets")
+
+oncall("executorch")
+
+define_common_targets()
Original file line number	Diff line number	Diff line change
`@@ -337,7 +337,9 @@ if(NOT "${_repo_dir_name}" STREQUAL "executorch")`
`337`	`337`	`"fix for this restriction."`
`338`	`338`	`)`
`339`	`339`	`endif()`
`340`		`-set(_common_include_directories ${CMAKE_CURRENT_SOURCE_DIR}/..)`
	`340`	`+set(_common_include_directories ${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/runtime/core/portable_type)`
	`341`	`+# We don't need any of C10's CMake macros.`
	`342`	`+add_definitions(-DC10_USING_CUSTOM_GENERATED_MACROS)`
`341`	`343`
`342`	`344`	`#`
`343`	`345`	# The `_<target>_srcs` lists are defined by including ${EXECUTORCH_SRCS_FILE}.