Skip to content

[compiler-rt] Add initial ARM64EC builtins support #139279

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion compiler-rt/cmake/Modules/AddCompilerRT.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ macro(set_output_name output name arch)
else()
if(ANDROID AND ${arch} STREQUAL "i386")
set(${output} "${name}-i686${COMPILER_RT_OS_SUFFIX}")
elseif("${arch}" MATCHES "^arm")
elseif(NOT "${arch}" MATCHES "^arm64" AND "${arch}" MATCHES "^arm")
if(COMPILER_RT_DEFAULT_TARGET_ONLY)
set(triple "${COMPILER_RT_DEFAULT_TARGET_TRIPLE}")
else()
Expand Down
2 changes: 1 addition & 1 deletion compiler-rt/cmake/builtin-config-ix.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ else()
endif()

set(AMDGPU amdgcn)
set(ARM64 aarch64)
set(ARM64 aarch64 arm64ec)
set(ARM32 arm armhf armv4t armv5te armv6 armv6m armv7m armv7em armv7 armv7s armv7k armv8m.base armv8m.main armv8.1m.main)
set(AVR avr)
set(HEXAGON hexagon)
Expand Down
1 change: 1 addition & 0 deletions compiler-rt/lib/builtins/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,7 @@ set(armv7k_SOURCES ${arm_SOURCES})
set(arm64_SOURCES ${aarch64_SOURCES})
set(arm64e_SOURCES ${aarch64_SOURCES})
set(arm64_32_SOURCES ${aarch64_SOURCES})
set(arm64ec_SOURCES ${aarch64_SOURCES})

# macho_embedded archs
set(armv6m_SOURCES ${thumb1_SOURCES})
Expand Down
14 changes: 10 additions & 4 deletions compiler-rt/lib/builtins/aarch64/chkstk.S
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,18 @@
// bl __chkstk
// sub sp, sp, x15, lsl #4

#ifdef __aarch64__
#if defined(__aarch64__) || defined(__arm64ec__)

#ifdef __arm64ec__
#define CHKSTK_FUNC __chkstk_arm64ec
#else
#define CHKSTK_FUNC __chkstk
#endif

#define PAGE_SIZE 4096

.p2align 2
DEFINE_COMPILERRT_FUNCTION(__chkstk)
DEFINE_COMPILERRT_FUNCTION(CHKSTK_FUNC)
lsl x16, x15, #4
mov x17, sp
1:
Expand All @@ -30,6 +36,6 @@ DEFINE_COMPILERRT_FUNCTION(__chkstk)
b.gt 1b

ret
END_COMPILERRT_FUNCTION(__chkstk)
END_COMPILERRT_FUNCTION(CHKSTK_FUNC)

#endif // __aarch64__
#endif // defined(__aarch64__) || defined(__arm64ec__)
4 changes: 2 additions & 2 deletions compiler-rt/lib/builtins/aarch64/lse.S
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
// Routines may modify temporary registers tmp0, tmp1, tmp2,
// return value x0 and the flags only.

#ifdef __aarch64__
#if defined(__aarch64__) || defined(__arm64ec__)

#ifdef HAS_ASM_LSE
.arch armv8-a+lse
Expand Down Expand Up @@ -267,4 +267,4 @@ NO_EXEC_STACK_DIRECTIVE
// GNU property note for BTI and PAC
GNU_PROPERTY_BTI_PAC

#endif // __aarch64__
#endif // defined(__aarch64__) || defined(__arm64ec__)
2 changes: 1 addition & 1 deletion compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ END_COMPILERRT_FUNCTION(__arm_sc_memcpy)
DEFINE_COMPILERRT_FUNCTION_ALIAS(__arm_sc_memmove, __arm_sc_memcpy)

// This version uses FP registers. Use this only on targets with them
#if defined(__aarch64__) && __ARM_FP != 0
#if (defined(__aarch64__) && __ARM_FP != 0) || defined(__arm64ec__)
//
// __arm_sc_memset
//
Expand Down
7 changes: 4 additions & 3 deletions compiler-rt/lib/builtins/clear_cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,14 @@ uintptr_t GetCurrentProcess(void);
// specified range.

void __clear_cache(void *start, void *end) {
#if __i386__ || __x86_64__ || defined(_M_IX86) || defined(_M_X64)
#if defined(_WIN32) && \
(defined(__arm__) || defined(__aarch64__) || defined(__arm64ec__))
FlushInstructionCache(GetCurrentProcess(), start, end - start);
#elif __i386__ || __x86_64__ || defined(_M_IX86) || defined(_M_X64)
// Intel processors have a unified instruction and data cache
// so there is nothing to do
#elif defined(__s390__)
// no-op
#elif defined(_WIN32) && (defined(__arm__) || defined(__aarch64__))
FlushInstructionCache(GetCurrentProcess(), start, end - start);
#elif defined(__arm__) && !defined(__APPLE__)
#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
struct arm_sync_icache_args arg;
Expand Down
3 changes: 2 additions & 1 deletion compiler-rt/lib/builtins/cpu_model/aarch64.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@

#include "aarch64.h"

#if !defined(__aarch64__) && !defined(__arm64__) && !defined(_M_ARM64)
#if !defined(__aarch64__) && !defined(__arm64__) && !defined(_M_ARM64) && \
!defined(__arm64ec__) && !defined(_M_ARM64EC)
#error This file is intended only for aarch64-based targets
#endif

Expand Down
3 changes: 2 additions & 1 deletion compiler-rt/lib/builtins/cpu_model/aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

#include "cpu_model.h"

#if !defined(__aarch64__) && !defined(__arm64__) && !defined(_M_ARM64)
#if !defined(__aarch64__) && !defined(__arm64__) && !defined(_M_ARM64) && \
!defined(__arm64ec__) && !defined(_M_ARM64EC)
#error This file is intended only for aarch64-based targets
#endif

Expand Down
2 changes: 1 addition & 1 deletion compiler-rt/lib/builtins/fp_compare_impl.inc
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// functions. We need to ensure that the return value is sign-extended in the
// same way as GCC expects (since otherwise GCC-generated __builtin_isinf
// returns true for finite 128-bit floating-point numbers).
#ifdef __aarch64__
#if defined(__aarch64__) || defined(__arm64ec__)
// AArch64 GCC overrides libgcc_cmp_return to use int instead of long.
typedef int CMP_RESULT;
#elif __SIZEOF_POINTER__ == 8 && __SIZEOF_LONG__ == 4
Expand Down
2 changes: 1 addition & 1 deletion compiler-rt/lib/builtins/fp_lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ static __inline fp_t __compiler_rt_scalbn(fp_t x, int y) {
return __compiler_rt_scalbnX(x, y);
}
static __inline fp_t __compiler_rt_fmax(fp_t x, fp_t y) {
#if defined(__aarch64__)
#if defined(__aarch64__) || defined(__arm64ec__)
// Use __builtin_fmax which turns into an fmaxnm instruction on AArch64.
return __builtin_fmax(x, y);
#else
Expand Down
2 changes: 1 addition & 1 deletion compiler-rt/lib/builtins/udivmodti4.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ static inline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v,

static inline du_int udiv128by64to64(du_int u1, du_int u0, du_int v,
du_int *r) {
#if defined(__x86_64__)
#if defined(__x86_64__) && !defined(__arm64ec__)
du_int result;
__asm__("divq %[v]"
: "=a"(result), "=d"(*r)
Expand Down
13 changes: 13 additions & 0 deletions compiler-rt/test/builtins/Unit/enable_execute_stack_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,22 @@ extern void __enable_execute_stack(void* addr);

typedef int (*pfunc)(void);

#ifdef __arm64ec__
// On ARM64EC, we need the x86_64 version of this function, but the compiler
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doing this for all of x86_64 seems a bit far-reaching; wouldn't it be cleaner to restrict this case to arm64ec? Then again, extending it to all of x86_64 does give it more coverage and exposure, but ideally I wouldn't be touching the behaviour for other architectures in a patch like this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed this to use __arm64ec__.

// would normally generate the AArch64 variant, so we hardcode it here.
static char func1[] = {
0xb8, 0x01, 0x00, 0x00, 0x00, // movl $0x1, %eax
0xc3 // retq
};
static char func2[] = {
0xb8, 0x02, 0x00, 0x00, 0x00, // movl $0x2, %eax
0xc3 // retq
};
#else
// Make these static to avoid ILT jumps for incremental linking on Windows.
static int func1() { return 1; }
static int func2() { return 2; }
#endif

void *__attribute__((noinline))
memcpy_f(void *dst, const void *src, size_t n) {
Expand Down
4 changes: 2 additions & 2 deletions compiler-rt/test/builtins/Unit/fixunstfdi_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

#include <stdio.h>

#if _ARCH_PPC || __aarch64__
#if _ARCH_PPC || __aarch64__ || __arm64ec__

#include "int_lib.h"

Expand Down Expand Up @@ -35,7 +35,7 @@ char assumption_3[sizeof(long double)*CHAR_BIT == 128] = {0};

int main()
{
#if _ARCH_PPC || __aarch64__
#if _ARCH_PPC || __aarch64__ || __arm64ec__
if (test__fixunstfdi(0.0, 0))
return 1;

Expand Down
4 changes: 2 additions & 2 deletions compiler-rt/test/builtins/Unit/multc3_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

#include <stdio.h>

#if _ARCH_PPC || __aarch64__
#if _ARCH_PPC || __aarch64__ || __arm64ec__

#include "int_lib.h"
#include <math.h>
Expand Down Expand Up @@ -348,7 +348,7 @@ long double x[][2] =

int main()
{
#if _ARCH_PPC || __aarch64__
#if _ARCH_PPC || __aarch64__ || __arm64ec__
const unsigned N = sizeof(x) / sizeof(x[0]);
unsigned i, j;
for (i = 0; i < N; ++i)
Expand Down
Loading