ROCm
diff --git a/‎clang/lib/AST/ByteCode/Compiler.cpp
Lines changed: 5 additions & 4 deletions b/‎clang/lib/AST/ByteCode/Compiler.cpp
Lines changed: 5 additions & 4 deletions
diff --git a/‎clang/lib/AST/ByteCode/Compiler.h
Lines changed: 1 addition & 0 deletions b/‎clang/lib/AST/ByteCode/Compiler.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎clang/lib/Headers/__clang_hip_cmath.h
Lines changed: 7 additions & 6 deletions b/‎clang/lib/Headers/__clang_hip_cmath.h
Lines changed: 7 additions & 6 deletions
diff --git a/‎clang/test/AST/ByteCode/builtin-bit-cast.cpp
Lines changed: 10 additions & 0 deletions b/‎clang/test/AST/ByteCode/builtin-bit-cast.cpp
Lines changed: 10 additions & 0 deletions
diff --git a/‎clang/test/CodeGen/AArch64/struct-coerce-using-ptr.cpp
Lines changed: 622 additions & 0 deletions b/‎clang/test/CodeGen/AArch64/struct-coerce-using-ptr.cpp
Lines changed: 622 additions & 0 deletions
diff --git a/‎clang/test/Headers/__clang_hip_cmath-return_types.hip
Lines changed: 18 additions & 18 deletions b/‎clang/test/Headers/__clang_hip_cmath-return_types.hip
Lines changed: 18 additions & 18 deletions
diff --git a/‎compiler-rt/cmake/Modules/AddCompilerRT.cmake
Lines changed: 1 addition & 1 deletion b/‎compiler-rt/cmake/Modules/AddCompilerRT.cmake
Lines changed: 1 addition & 1 deletion
diff --git a/‎compiler-rt/cmake/builtin-config-ix.cmake
Lines changed: 1 addition & 1 deletion b/‎compiler-rt/cmake/builtin-config-ix.cmake
Lines changed: 1 addition & 1 deletion
diff --git a/‎compiler-rt/lib/builtins/CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎compiler-rt/lib/builtins/CMakeLists.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎compiler-rt/lib/builtins/aarch64/chkstk.S
Lines changed: 10 additions & 4 deletions b/‎compiler-rt/lib/builtins/aarch64/chkstk.S
Lines changed: 10 additions & 4 deletions
diff --git a/‎compiler-rt/lib/builtins/aarch64/lse.S
Lines changed: 2 additions & 2 deletions b/‎compiler-rt/lib/builtins/aarch64/lse.S
Lines changed: 2 additions & 2 deletions
diff --git a/‎compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
Lines changed: 1 addition & 1 deletion b/‎compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
Lines changed: 1 addition & 1 deletion
diff --git a/‎compiler-rt/lib/builtins/clear_cache.c
Lines changed: 4 additions & 3 deletions b/‎compiler-rt/lib/builtins/clear_cache.c
Lines changed: 4 additions & 3 deletions
diff --git a/‎compiler-rt/lib/builtins/cpu_model/aarch64.c
Lines changed: 2 additions & 1 deletion b/‎compiler-rt/lib/builtins/cpu_model/aarch64.c
Lines changed: 2 additions & 1 deletion
diff --git a/‎compiler-rt/lib/builtins/cpu_model/aarch64.h
Lines changed: 2 additions & 1 deletion b/‎compiler-rt/lib/builtins/cpu_model/aarch64.h
Lines changed: 2 additions & 1 deletion
diff --git a/‎compiler-rt/lib/builtins/fp_compare_impl.inc
Lines changed: 1 addition & 1 deletion b/‎compiler-rt/lib/builtins/fp_compare_impl.inc
Lines changed: 1 addition & 1 deletion
diff --git a/‎compiler-rt/lib/builtins/fp_lib.h
Lines changed: 1 addition & 1 deletion b/‎compiler-rt/lib/builtins/fp_lib.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎compiler-rt/lib/builtins/udivmodti4.c
Lines changed: 1 addition & 1 deletion b/‎compiler-rt/lib/builtins/udivmodti4.c
Lines changed: 1 addition & 1 deletion
diff --git a/‎compiler-rt/test/builtins/Unit/enable_execute_stack_test.c
Lines changed: 13 additions & 0 deletions b/‎compiler-rt/test/builtins/Unit/enable_execute_stack_test.c
Lines changed: 13 additions & 0 deletions
diff --git a/‎compiler-rt/test/builtins/Unit/fixunstfdi_test.c
Lines changed: 2 additions & 2 deletions b/‎compiler-rt/test/builtins/Unit/fixunstfdi_test.c
Lines changed: 2 additions & 2 deletions
diff --git a/‎compiler-rt/test/builtins/Unit/multc3_test.c
Lines changed: 2 additions & 2 deletions b/‎compiler-rt/test/builtins/Unit/multc3_test.c
Lines changed: 2 additions & 2 deletions
@@ -474,10 +474,6 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) {
       return false;
     return this->emitDecayPtr(*FromT, *ToT, CE);
   }
-
-  case CK_LValueToRValueBitCast:
-    return this->emitBuiltinBitCast(CE);
-
   case CK_IntegralToBoolean:
   case CK_FixedPointToBoolean: {
     // HLSL uses this to cast to one-element vectors.
@@ -735,6 +731,11 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) {
   llvm_unreachable("Unhandled clang::CastKind enum");
 }
 
+template <class Emitter>
+bool Compiler<Emitter>::VisitBuiltinBitCastExpr(const BuiltinBitCastExpr *E) {
+  return this->emitBuiltinBitCast(E);
+}
+
 template <class Emitter>
 bool Compiler<Emitter>::VisitIntegerLiteral(const IntegerLiteral *LE) {
   if (DiscardResult)
 
@@ -126,6 +126,7 @@ class Compiler : public ConstStmtVisitor<Compiler<Emitter>, bool>,
 
   // Expressions.
   bool VisitCastExpr(const CastExpr *E);
+  bool VisitBuiltinBitCastExpr(const BuiltinBitCastExpr *E);
   bool VisitIntegerLiteral(const IntegerLiteral *E);
   bool VisitFloatingLiteral(const FloatingLiteral *E);
   bool VisitImaginaryLiteral(const ImaginaryLiteral *E);
 
@@ -466,12 +466,13 @@ class __promote : public __promote_imp<_A1, _A2, _A3> {};
 #if __cplusplus >= 201103L
 #define __HIP_OVERLOAD2(__retty, __fn)                                         \
   template <typename __T1, typename __T2>                                      \
-  __DEVICE__ __CONSTEXPR__ typename __hip_enable_if<                           \
-      __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value,  \
-      typename __hip::__promote<__T1, __T2>::type>::type                       \
-  __fn(__T1 __x, __T2 __y) {                                                   \
-    typedef typename __hip::__promote<__T1, __T2>::type __result_type;         \
-    return __fn((__result_type)__x, (__result_type)__y);                       \
+  __DEVICE__ __CONSTEXPR__                                                     \
+      typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&            \
+                                   __hip::is_arithmetic<__T2>::value,          \
+                               __retty>::type                                  \
+      __fn(__T1 __x, __T2 __y) {                                               \
+    typedef typename __hip::__promote<__T1, __T2>::type __arg_type;            \
+    return __fn((__arg_type)__x, (__arg_type)__y);                             \
   }
 #else
 #define __HIP_OVERLOAD2(__retty, __fn)                                         \
 
@@ -503,6 +503,16 @@ namespace OversizedBitField {
 #endif
 }
 
+namespace Discarded {
+  enum my_byte : unsigned char {};
+  struct pad {
+    char a;
+    int b;
+  };
+  constexpr int bad_my_byte = (__builtin_bit_cast(my_byte[8], pad{1, 2}), 0); // both-error {{must be initialized by a constant expression}} \
+                                                                              // both-note {{indeterminate value can only initialize an object of type 'unsigned char' or 'std::byte';}}
+}
+
 typedef bool bool9 __attribute__((ext_vector_type(9)));
 // both-error@+2 {{constexpr variable 'bad_bool9_to_short' must be initialized by a constant expression}}
 // both-note@+1 {{bit_cast involving type 'bool __attribute__((ext_vector_type(9)))' (vector of 9 'bool' values) is not allowed in a constant expression; element size 1 * element count 9 is not a multiple of the byte size 8}}
 
@@ -376,19 +376,19 @@ __device__ void test_isnormal()
 __device__ void test_isgreater()
 {
     static_assert(is_same<decltype(isgreater((float)0, (float)0)), bool>::value, "");
-    static_assert(is_same<decltype(isgreater((float)0, (double)0)), double>::value, "");
-    static_assert(is_same<decltype(isgreater((double)0, (float)0)), double>::value, "");
+    static_assert(is_same<decltype(isgreater((float)0, (double)0)), bool>::value, "");
+    static_assert(is_same<decltype(isgreater((double)0, (float)0)), bool>::value, "");
     static_assert(is_same<decltype(isgreater((double)0, (double)0)), bool>::value, "");
-    static_assert(is_same<decltype(isgreater(0, (double)0)), double>::value, "");
+    static_assert(is_same<decltype(isgreater(0, (double)0)), bool>::value, "");
 }
 
 __device__ void test_isgreaterequal()
 {
     static_assert(is_same<decltype(isgreaterequal((float)0, (float)0)), bool>::value, "");
-    static_assert(is_same<decltype(isgreaterequal((float)0, (double)0)), double>::value, "");
-    static_assert(is_same<decltype(isgreaterequal((double)0, (float)0)), double>::value, "");
+    static_assert(is_same<decltype(isgreaterequal((float)0, (double)0)), bool>::value, "");
+    static_assert(is_same<decltype(isgreaterequal((double)0, (float)0)), bool>::value, "");
     static_assert(is_same<decltype(isgreaterequal((double)0, (double)0)), bool>::value, "");
-    static_assert(is_same<decltype(isgreaterequal(0, (double)0)), double>::value, "");
+    static_assert(is_same<decltype(isgreaterequal(0, (double)0)), bool>::value, "");
 }
 
 __device__ void test_isinf()
@@ -400,28 +400,28 @@ __device__ void test_isinf()
 __device__ void test_isless()
 {
     static_assert(is_same<decltype(isless((float)0, (float)0)), bool>::value, "");
-    static_assert(is_same<decltype(isless((float)0, (double)0)), double>::value, "");
-    static_assert(is_same<decltype(isless((double)0, (float)0)), double>::value, "");
+    static_assert(is_same<decltype(isless((float)0, (double)0)), bool>::value, "");
+    static_assert(is_same<decltype(isless((double)0, (float)0)), bool>::value, "");
     static_assert(is_same<decltype(isless((double)0, (double)0)), bool>::value, "");
-    static_assert(is_same<decltype(isless(0, (double)0)), double>::value, "");
+    static_assert(is_same<decltype(isless(0, (double)0)), bool>::value, "");
 }
 
 __device__ void test_islessequal()
 {
     static_assert(is_same<decltype(islessequal((float)0, (float)0)), bool>::value, "");
-    static_assert(is_same<decltype(islessequal((float)0, (double)0)), double>::value, "");
-    static_assert(is_same<decltype(islessequal((double)0, (float)0)), double>::value, "");
+    static_assert(is_same<decltype(islessequal((float)0, (double)0)), bool>::value, "");
+    static_assert(is_same<decltype(islessequal((double)0, (float)0)), bool>::value, "");
     static_assert(is_same<decltype(islessequal((double)0, (double)0)), bool>::value, "");
-    static_assert(is_same<decltype(islessequal(0, (double)0)), double>::value, "");
+    static_assert(is_same<decltype(islessequal(0, (double)0)), bool>::value, "");
 }
 
 __device__ void test_islessgreater()
 {
     static_assert(is_same<decltype(islessgreater((float)0, (float)0)), bool>::value, "");
-    static_assert(is_same<decltype(islessgreater((float)0, (double)0)), double>::value, "");
-    static_assert(is_same<decltype(islessgreater((double)0, (float)0)), double>::value, "");
+    static_assert(is_same<decltype(islessgreater((float)0, (double)0)), bool>::value, "");
+    static_assert(is_same<decltype(islessgreater((double)0, (float)0)), bool>::value, "");
     static_assert(is_same<decltype(islessgreater((double)0, (double)0)), bool>::value, "");
-    static_assert(is_same<decltype(islessgreater(0, (double)0)), double>::value, "");
+    static_assert(is_same<decltype(islessgreater(0, (double)0)), bool>::value, "");
 }
 
 __device__ void test_isnan()
@@ -433,10 +433,10 @@ __device__ void test_isnan()
 __device__ void test_isunordered()
 {
     static_assert(is_same<decltype(isunordered((float)0, (float)0)), bool>::value, "");
-    static_assert(is_same<decltype(isunordered((float)0, (double)0)), double>::value, "");
-    static_assert(is_same<decltype(isunordered((double)0, (float)0)), double>::value, "");
+    static_assert(is_same<decltype(isunordered((float)0, (double)0)), bool>::value, "");
+    static_assert(is_same<decltype(isunordered((double)0, (float)0)), bool>::value, "");
     static_assert(is_same<decltype(isunordered((double)0, (double)0)), bool>::value, "");
-    static_assert(is_same<decltype(isunordered(0, (double)0)), double>::value, "");
+    static_assert(is_same<decltype(isunordered(0, (double)0)), bool>::value, "");
 }
 
 __device__ void test_acosh()
 
@@ -123,7 +123,7 @@ macro(set_output_name output name arch)
   else()
     if(ANDROID AND ${arch} STREQUAL "i386")
       set(${output} "${name}-i686${COMPILER_RT_OS_SUFFIX}")
-    elseif("${arch}" MATCHES "^arm")
+    elseif(NOT "${arch}" MATCHES "^arm64" AND "${arch}" MATCHES "^arm")
       if(COMPILER_RT_DEFAULT_TARGET_ONLY)
         set(triple "${COMPILER_RT_DEFAULT_TARGET_TRIPLE}")
       else()
 
@@ -59,7 +59,7 @@ else()
 endif()
 
 set(AMDGPU amdgcn)
-set(ARM64 aarch64)
+set(ARM64 aarch64 arm64ec)
 set(ARM32 arm armhf armv4t armv5te armv6 armv6m armv7m armv7em armv7 armv7s armv7k armv8m.base armv8m.main armv8.1m.main)
 set(AVR avr)
 set(HEXAGON hexagon)
 
@@ -668,6 +668,7 @@ set(armv7k_SOURCES ${arm_SOURCES})
 set(arm64_SOURCES ${aarch64_SOURCES})
 set(arm64e_SOURCES ${aarch64_SOURCES})
 set(arm64_32_SOURCES ${aarch64_SOURCES})
+set(arm64ec_SOURCES ${aarch64_SOURCES})
 
 # macho_embedded archs
 set(armv6m_SOURCES ${thumb1_SOURCES})
 
@@ -15,12 +15,18 @@
 //      bl      __chkstk
 //      sub     sp, sp, x15, lsl #4
 
-#ifdef __aarch64__
+#if defined(__aarch64__) || defined(__arm64ec__)
+
+#ifdef __arm64ec__
+#define CHKSTK_FUNC __chkstk_arm64ec
+#else
+#define CHKSTK_FUNC __chkstk
+#endif
 
 #define PAGE_SIZE 4096
 
         .p2align 2
-DEFINE_COMPILERRT_FUNCTION(__chkstk)
+DEFINE_COMPILERRT_FUNCTION(CHKSTK_FUNC)
         lsl    x16, x15, #4
         mov    x17, sp
 1:
@@ -30,6 +36,6 @@ DEFINE_COMPILERRT_FUNCTION(__chkstk)
         b.gt   1b
 
         ret
-END_COMPILERRT_FUNCTION(__chkstk)
+END_COMPILERRT_FUNCTION(CHKSTK_FUNC)
 
-#endif // __aarch64__
+#endif // defined(__aarch64__) || defined(__arm64ec__)
@@ -20,7 +20,7 @@
 // Routines may modify temporary registers tmp0, tmp1, tmp2,
 // return value x0 and the flags only.
 
-#ifdef __aarch64__
+#if defined(__aarch64__) || defined(__arm64ec__)
 
 #ifdef HAS_ASM_LSE
 .arch armv8-a+lse
@@ -267,4 +267,4 @@ NO_EXEC_STACK_DIRECTIVE
 // GNU property note for BTI and PAC
 GNU_PROPERTY_BTI_PAC
 
-#endif // __aarch64__
+#endif // defined(__aarch64__) || defined(__arm64ec__)
@@ -235,7 +235,7 @@ END_COMPILERRT_FUNCTION(__arm_sc_memcpy)
 DEFINE_COMPILERRT_FUNCTION_ALIAS(__arm_sc_memmove, __arm_sc_memcpy)
 
 // This version uses FP registers. Use this only on targets with them
-#if defined(__aarch64__) && __ARM_FP != 0
+#if (defined(__aarch64__) && __ARM_FP != 0) || defined(__arm64ec__)
 //
 //  __arm_sc_memset
 //
 
@@ -59,13 +59,14 @@ uintptr_t GetCurrentProcess(void);
 // specified range.
 
 void __clear_cache(void *start, void *end) {
-#if __i386__ || __x86_64__ || defined(_M_IX86) || defined(_M_X64)
+#if defined(_WIN32) &&                                                         \
+    (defined(__arm__) || defined(__aarch64__) || defined(__arm64ec__))
+  FlushInstructionCache(GetCurrentProcess(), start, end - start);
+#elif __i386__ || __x86_64__ || defined(_M_IX86) || defined(_M_X64)
 // Intel processors have a unified instruction and data cache
 // so there is nothing to do
 #elif defined(__s390__)
 // no-op
-#elif defined(_WIN32) && (defined(__arm__) || defined(__aarch64__))
-  FlushInstructionCache(GetCurrentProcess(), start, end - start);
 #elif defined(__arm__) && !defined(__APPLE__)
 #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
   struct arm_sync_icache_args arg;
 
@@ -14,7 +14,8 @@
 
 #include "aarch64.h"
 
-#if !defined(__aarch64__) && !defined(__arm64__) && !defined(_M_ARM64)
+#if !defined(__aarch64__) && !defined(__arm64__) && !defined(_M_ARM64) &&      \
+    !defined(__arm64ec__) && !defined(_M_ARM64EC)
 #error This file is intended only for aarch64-based targets
 #endif
 
 
@@ -8,7 +8,8 @@
 
 #include "cpu_model.h"
 
-#if !defined(__aarch64__) && !defined(__arm64__) && !defined(_M_ARM64)
+#if !defined(__aarch64__) && !defined(__arm64__) && !defined(_M_ARM64) &&      \
+    !defined(__arm64ec__) && !defined(_M_ARM64EC)
 #error This file is intended only for aarch64-based targets
 #endif
 
 
@@ -12,7 +12,7 @@
 // functions. We need to ensure that the return value is sign-extended in the
 // same way as GCC expects (since otherwise GCC-generated __builtin_isinf
 // returns true for finite 128-bit floating-point numbers).
-#ifdef __aarch64__
+#if defined(__aarch64__) || defined(__arm64ec__)
 // AArch64 GCC overrides libgcc_cmp_return to use int instead of long.
 typedef int CMP_RESULT;
 #elif __SIZEOF_POINTER__ == 8 && __SIZEOF_LONG__ == 4
 
@@ -359,7 +359,7 @@ static __inline fp_t __compiler_rt_scalbn(fp_t x, int y) {
   return __compiler_rt_scalbnX(x, y);
 }
 static __inline fp_t __compiler_rt_fmax(fp_t x, fp_t y) {
-#if defined(__aarch64__)
+#if defined(__aarch64__) || defined(__arm64ec__)
   // Use __builtin_fmax which turns into an fmaxnm instruction on AArch64.
   return __builtin_fmax(x, y);
 #else
 
@@ -83,7 +83,7 @@ static inline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v,
 
 static inline du_int udiv128by64to64(du_int u1, du_int u0, du_int v,
                                      du_int *r) {
-#if defined(__x86_64__)
+#if defined(__x86_64__) && !defined(__arm64ec__)
   du_int result;
   __asm__("divq %[v]"
           : "=a"(result), "=d"(*r)
 
@@ -10,9 +10,22 @@ extern void __enable_execute_stack(void* addr);
 
 typedef int (*pfunc)(void);
 
+#ifdef __arm64ec__
+// On ARM64EC, we need the x86_64 version of this function, but the compiler
+// would normally generate the AArch64 variant, so we hardcode it here.
+static char func1[] = {
+    0xb8, 0x01, 0x00, 0x00, 0x00, // movl    $0x1, %eax
+    0xc3                          // retq
+};
+static char func2[] = {
+    0xb8, 0x02, 0x00, 0x00, 0x00, // movl    $0x2, %eax
+    0xc3                          // retq
+};
+#else
 // Make these static to avoid ILT jumps for incremental linking on Windows.
 static int func1() { return 1; }
 static int func2() { return 2; }
+#endif
 
 void *__attribute__((noinline))
 memcpy_f(void *dst, const void *src, size_t n) {
 
@@ -4,7 +4,7 @@
 
 #include <stdio.h>
 
-#if _ARCH_PPC || __aarch64__
+#if _ARCH_PPC || __aarch64__ || __arm64ec__
 
 #include "int_lib.h"
 
@@ -35,7 +35,7 @@ char assumption_3[sizeof(long double)*CHAR_BIT == 128] = {0};
 
 int main()
 {
-#if _ARCH_PPC || __aarch64__
+#if _ARCH_PPC || __aarch64__ || __arm64ec__
     if (test__fixunstfdi(0.0, 0))
         return 1;
 
 
@@ -4,7 +4,7 @@
 
 #include <stdio.h>
 
-#if _ARCH_PPC || __aarch64__
+#if _ARCH_PPC || __aarch64__ || __arm64ec__
 
 #include "int_lib.h"
 #include <math.h>
@@ -348,7 +348,7 @@ long double x[][2] =
 
 int main()
 {
-#if _ARCH_PPC || __aarch64__
+#if _ARCH_PPC || __aarch64__ || __arm64ec__
     const unsigned N = sizeof(x) / sizeof(x[0]);
     unsigned i, j;
     for (i = 0; i < N; ++i)
Original file line number	Diff line number	Diff line change
`@@ -359,7 +359,7 @@ static __inline fp_t __compiler_rt_scalbn(fp_t x, int y) {`
`359`	`359`	`return __compiler_rt_scalbnX(x, y);`
`360`	`360`	`}`
`361`	`361`	`static __inline fp_t __compiler_rt_fmax(fp_t x, fp_t y) {`
`362`		`-#if defined(__aarch64__)`
	`362`	`+#if defined(__aarch64__) \|\| defined(__arm64ec__)`
`363`	`363`	`// Use __builtin_fmax which turns into an fmaxnm instruction on AArch64.`
`364`	`364`	`return __builtin_fmax(x, y);`
`365`	`365`	`#else`