Skip to content

[ARM] arm_acle.h add Coprocessor Instrinsics #75440

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 9, 2024

Conversation

hstk30-hw
Copy link
Contributor

@hstk30-hw hstk30-hw commented Dec 14, 2023

#75424

Add Coprocessor Instrinsics

@llvmbot llvmbot added clang Clang issues not falling into any other category backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics labels Dec 14, 2023
@llvmbot
Copy link
Member

llvmbot commented Dec 14, 2023

@llvm/pr-subscribers-backend-arm

@llvm/pr-subscribers-clang

Author: None (hstk30-hw)

Changes

#75424

Add Corprocessor Instrinsics


Full diff: https://github.com/llvm/llvm-project/pull/75440.diff

1 Files Affected:

  • (modified) clang/lib/Headers/arm_acle.h (+48-1)
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 61d80258d166a1..9c709550ccc45b 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -752,10 +752,57 @@ __arm_st64bv0(void *__addr, data512_t __value) {
 #define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb)
 
 /* Memory Operations Intrinsics */
-#define __arm_mops_memset_tag(__tagged_address, __value, __size)    \
+#define __arm_mops_memset_tag(__tagged_address, __value, __size)               \
   __builtin_arm_mops_memset_tag(__tagged_address, __value, __size)
 #endif
 
+/* Coprocessor Intrinsics */
+#if (!__thumb__ || __thumb2__) && __ARM_ARCH >= 4
+#define __arm_cdp(__coproc, __opc1, __CRd, __CRn, __CRm, __opc2)               \
+  __builtin_arm_cdp(__coproc, __opc1, __CRd, __CRn, __CRm, __opc2)
+#define __arm_ldc(__coproc, __CRd, __p) __builtin_arm_ldc(__coproc, __CRd, __p)
+#define __arm_ldcl(__coproc, __CRd, __p)                                       \
+  __builtin_arm_ldcl(__coproc, __CRd, __p)
+#define __arm_stc(__coproc, __CRd, __p) __builtin_arm_stc(__coproc, __CRd, __p)
+#define __arm_stcl(__coproc, __CRd, __p)                                       \
+  __builtin_arm_stcl(__coproc, __CRd, __p)
+#define __arm_mcr(__coproc, __opc1, __value, __CRn, __CRm, __opc2)             \
+  __builtin_arm_mcr(__coproc, __opc1, __value, __CRn, __CRm, __opc2)
+#define __arm_mrc(__coproc, __opc1, __CRn, __CRm, __opc2)                      \
+  __builtin_arm_mrc(__coproc, __opc1, __CRn, __CRm, __opc2)
+
+#if __ARM_ARCH >= 5
+#define __arm_cdp2(__coproc, __opc1, __CRd, __CRn, __CRm, __opc2)              \
+  __builtin_arm_cdp2(__coproc, __opc1, __CRd, __CRn, __CRm, __opc2)
+#define __arm_ldc2(__coproc, __CRd, __p)                                       \
+  __builtin_arm_ldc2(__coproc, __CRd, __p)
+#define __arm_ldc2l(__coproc, __CRd, __p)                                      \
+  __builtin_arm_ldc2l(__coproc, __CRd, __p)
+#define __arm_stc2(__coproc, __CRd, __p)                                       \
+  __builtin_arm_stc2(__coproc, __CRd, __p)
+#define __arm_stc2l(__coproc, __CRd, __p)                                      \
+  __builtin_arm_stc2l(__coproc, __CRd, __p)
+#define __arm_mcr2(__coproc, __opc1, __value, __CRn, __CRm, __opc2)            \
+  __builtin_arm_mcr2(__coproc, __opc1, __value, __CRn, __CRm, __opc2)
+#define __arm_mrc2(__coproc, __opc1, __CRn, __CRm, __opc2)                     \
+  __builtin_arm_mrc2(__coproc, __opc1, __CRn, __CRm, __opc2)
+
+#if __ARM_ARCH >= 6 || defined(__ARM_ARCH_5TE__)
+#define __arm_mcrr(__coproc, __opc1, __value, __CRm)                           \
+  __builtin_arm_mcrr(__coproc, __opc1, __value, __CRm)
+#define __arm_mrrc(__coproc, __opc1, __CRm)                                    \
+  __builtin_arm_mrrc(__coproc, __opc1, __CRm)
+
+#if __ARM_ARCH >= 6
+#define __arm_mcrr2(__coproc, __opc1, __value, __CRm)                          \
+  __builtin_arm_mcrr2(__coproc, __opc1, __value, __CRm)
+#define __arm_mrrc2(__coproc, __opc1, __CRm)                                   \
+  __builtin_arm_mrrc2(__coproc, __opc1, __CRm)
+#endif /* __ARM_ARCH >= 6.  */
+#endif /* __ARM_ARCH >= 6 ||  defined (__ARM_ARCH_5TE__).  */
+#endif /*  __ARM_ARCH >= 5.  */
+#endif /* (!__thumb__ || __thumb2__) &&  __ARM_ARCH >= 4.  */
+
 /* Transactional Memory Extension (TME) Intrinsics */
 #if defined(__ARM_FEATURE_TME) && __ARM_FEATURE_TME
 

@llvmbot
Copy link
Member

llvmbot commented Dec 14, 2023

@llvm/pr-subscribers-backend-x86

Author: None (hstk30-hw)

Changes

#75424

Add Corprocessor Instrinsics


Full diff: https://github.com/llvm/llvm-project/pull/75440.diff

1 Files Affected:

  • (modified) clang/lib/Headers/arm_acle.h (+48-1)
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 61d80258d166a1..9c709550ccc45b 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -752,10 +752,57 @@ __arm_st64bv0(void *__addr, data512_t __value) {
 #define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb)
 
 /* Memory Operations Intrinsics */
-#define __arm_mops_memset_tag(__tagged_address, __value, __size)    \
+#define __arm_mops_memset_tag(__tagged_address, __value, __size)               \
   __builtin_arm_mops_memset_tag(__tagged_address, __value, __size)
 #endif
 
+/* Coprocessor Intrinsics */
+#if (!__thumb__ || __thumb2__) && __ARM_ARCH >= 4
+#define __arm_cdp(__coproc, __opc1, __CRd, __CRn, __CRm, __opc2)               \
+  __builtin_arm_cdp(__coproc, __opc1, __CRd, __CRn, __CRm, __opc2)
+#define __arm_ldc(__coproc, __CRd, __p) __builtin_arm_ldc(__coproc, __CRd, __p)
+#define __arm_ldcl(__coproc, __CRd, __p)                                       \
+  __builtin_arm_ldcl(__coproc, __CRd, __p)
+#define __arm_stc(__coproc, __CRd, __p) __builtin_arm_stc(__coproc, __CRd, __p)
+#define __arm_stcl(__coproc, __CRd, __p)                                       \
+  __builtin_arm_stcl(__coproc, __CRd, __p)
+#define __arm_mcr(__coproc, __opc1, __value, __CRn, __CRm, __opc2)             \
+  __builtin_arm_mcr(__coproc, __opc1, __value, __CRn, __CRm, __opc2)
+#define __arm_mrc(__coproc, __opc1, __CRn, __CRm, __opc2)                      \
+  __builtin_arm_mrc(__coproc, __opc1, __CRn, __CRm, __opc2)
+
+#if __ARM_ARCH >= 5
+#define __arm_cdp2(__coproc, __opc1, __CRd, __CRn, __CRm, __opc2)              \
+  __builtin_arm_cdp2(__coproc, __opc1, __CRd, __CRn, __CRm, __opc2)
+#define __arm_ldc2(__coproc, __CRd, __p)                                       \
+  __builtin_arm_ldc2(__coproc, __CRd, __p)
+#define __arm_ldc2l(__coproc, __CRd, __p)                                      \
+  __builtin_arm_ldc2l(__coproc, __CRd, __p)
+#define __arm_stc2(__coproc, __CRd, __p)                                       \
+  __builtin_arm_stc2(__coproc, __CRd, __p)
+#define __arm_stc2l(__coproc, __CRd, __p)                                      \
+  __builtin_arm_stc2l(__coproc, __CRd, __p)
+#define __arm_mcr2(__coproc, __opc1, __value, __CRn, __CRm, __opc2)            \
+  __builtin_arm_mcr2(__coproc, __opc1, __value, __CRn, __CRm, __opc2)
+#define __arm_mrc2(__coproc, __opc1, __CRn, __CRm, __opc2)                     \
+  __builtin_arm_mrc2(__coproc, __opc1, __CRn, __CRm, __opc2)
+
+#if __ARM_ARCH >= 6 || defined(__ARM_ARCH_5TE__)
+#define __arm_mcrr(__coproc, __opc1, __value, __CRm)                           \
+  __builtin_arm_mcrr(__coproc, __opc1, __value, __CRm)
+#define __arm_mrrc(__coproc, __opc1, __CRm)                                    \
+  __builtin_arm_mrrc(__coproc, __opc1, __CRm)
+
+#if __ARM_ARCH >= 6
+#define __arm_mcrr2(__coproc, __opc1, __value, __CRm)                          \
+  __builtin_arm_mcrr2(__coproc, __opc1, __value, __CRm)
+#define __arm_mrrc2(__coproc, __opc1, __CRm)                                   \
+  __builtin_arm_mrrc2(__coproc, __opc1, __CRm)
+#endif /* __ARM_ARCH >= 6.  */
+#endif /* __ARM_ARCH >= 6 ||  defined (__ARM_ARCH_5TE__).  */
+#endif /*  __ARM_ARCH >= 5.  */
+#endif /* (!__thumb__ || __thumb2__) &&  __ARM_ARCH >= 4.  */
+
 /* Transactional Memory Extension (TME) Intrinsics */
 #if defined(__ARM_FEATURE_TME) && __ARM_FEATURE_TME
 

@vhscampos
Copy link
Member

There are some typos in the commit message and in the PR title.

@hstk30-hw hstk30-hw changed the title [ARM] arm_acle.h add Corprocessor Instrinsics [ARM] arm_acle.h add Coprocessor Instrinsics Dec 19, 2023
@hstk30-hw hstk30-hw force-pushed the acle-add-copro-instr branch from 12fc8ec to fdb2b45 Compare December 19, 2023 01:12
@davemgreen
Copy link
Collaborator

It looks like there is a downstream implementation of this that was never upstreamed. Perhaps someone can fish it out for you to show how it looked? It might be using the wrong predefined macro, but does have some tests.

@hstk30-hw
Copy link
Contributor Author

@davemgreen
Copy link
Collaborator

Let me try and get the downstream version, you might be able to pick up some things from it. A test at least should probably be present.

@davemgreen
Copy link
Collaborator

This is the downstream code we have: https://gist.github.com/davemgreen/e7ade833274a60e975e67a66eda7cb44
Note that the __ARM_TARGET_COPROC_XYZ macros are probably wrong. They should be __ARM_FEATURE_COPROC bitfield macros according to the ACLE.

Can you make use of some of that? It would be good to add the macro definition at the same time as the intrinsics (they can be used to control when the intrinsics are available), and the test should be useful for checking they are available at the right times.

@hstk30-hw hstk30-hw force-pushed the acle-add-copro-instr branch from fdb2b45 to 5a746e9 Compare December 22, 2023 13:29
@llvmbot llvmbot added backend:X86 clang:frontend Language frontend issues, e.g. anything involving "Sema" labels Dec 22, 2023
Copy link

github-actions bot commented Dec 22, 2023

✅ With the latest revision this PR passed the C/C++ code formatter.

@davemgreen
Copy link
Collaborator

Thanks for doing this.
I think that __ARM_FEATURE_COPROC should be a bitfield, as defined in https://arm-software.github.io/acle/main/acle.html#coprocessor-intrinsics. That would remove the need for the other macros.

void ldc(int i) {
__arm_ldc(1, 2, &i);
// CHECK-V4: __builtin_arm_ldc
// CHECK-V4-NOT: __builtin_arm_ldc
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

plz check it, maybe a typo for V4-THUMB @davemgreen

@hstk30-hw
Copy link
Contributor Author

Use bitfield still a mess up because the different arm version have different Instrinsics available even in the same bit group.
I will try it, hope it readable.

Copy link
Collaborator

@davemgreen davemgreen left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. This is looking good to me. I just have a few comments about different architecture revisions.

case llvm::ARM::ArchKind::ARMV9_1A:
case llvm::ARM::ArchKind::ARMV9_2A:
case llvm::ARM::ArchKind::ARMV9_3A:
case llvm::ARM::ArchKind::ARMV9_4A:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a ARMV9_5A now too. I think I would expect these to be the same as ARMV8.
Is this switch statement exhaustive? Could the default case be made the same as ARMV8 so we don't need to extend it every time an architecture is added?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need defined ARMV9_5A in ARMTargetParser.def first.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this https://gist.github.com/davemgreen/e7ade833274a60e975e67a66eda7cb44, not have test cases on ARMV9. According to the code logic, ARMV9 seem support all Coprocessor Instrinsics. This is different from ARMV8, I'm not sure, so the default case is 0 for now.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh right, ARMV9_5A is AArch64 only. That's OK then.
I would expect the other ArmV9-A cases to be the same as ArmV8-A for AArch32, and wouldn't have expected a change in coprocessor instructions.
The reference manual is at https://developer.arm.com/documentation/ddi0487/ja/?lang=en and doesn't seem to mention cdp.

@davemgreen
Copy link
Collaborator

If you can make armv9-a work the same as armv8-a and add some tests for it then this LGTM

Copy link
Collaborator

@davemgreen davemgreen left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. LGTM

@hstk30-hw
Copy link
Contributor Author

There are some typos in the commit message and in the PR title.

@vhscampos Check again. Fixed the typos.

Copy link
Member

@vhscampos vhscampos left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. Thanks

@hstk30-hw hstk30-hw merged commit 4f68ee3 into llvm:main Jan 9, 2024
justinfargnoli pushed a commit to justinfargnoli/llvm-project that referenced this pull request Jan 28, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:ARM clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

6 participants