-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LLVM][MC][AArch64] Assembler support for Armv9.6-A memory systems extensions #112341
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang-driver @llvm/pr-subscribers-clang Author: Nashe Mncube (nasherm) ChangesAdd support for the following Armv9.6-A memory systems extensions: as documented here: Patch is 81.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112341.diff 23 Files Affected:
diff --git a/clang/test/Driver/aarch64-v96a.c b/clang/test/Driver/aarch64-v96a.c
index 0aaadddb2842f8..fd24585acf24f2 100644
--- a/clang/test/Driver/aarch64-v96a.c
+++ b/clang/test/Driver/aarch64-v96a.c
@@ -17,3 +17,16 @@
// GENERICV96A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"
//
// ===== Features supported on aarch64 =====
+//
+// RUN: %clang -target aarch64 -march=armv9.6a+lsui -### -c %s 2>&1 | FileCheck -check-prefix=V96A-LSUI %s
+// RUN: %clang -target aarch64 -march=armv9.6-a+lsui -### -c %s 2>&1 | FileCheck -check-prefix=V96A-LSUI %s
+// V96A-LSUI: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+lsui"
+//
+// RUN: %clang -target aarch64 -march=armv9.6a+occmo -### -c %s 2>&1 | FileCheck -check-prefix=V96A-OCCMO %s
+// RUN: %clang -target aarch64 -march=armv9.6-a+occmo -### -c %s 2>&1 | FileCheck -check-prefix=V96A-OCCMO %s
+// V96A-OCCMO: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+occmo"
+//
+// RUN: %clang -target aarch64 -march=armv9.6a+pcdphint -### -c %s 2>&1 | FileCheck -check-prefix=V96A-PCDPHINT %s
+// RUN: %clang -target aarch64 -march=armv9.6-a+pcdphint -### -c %s 2>&1 | FileCheck -check-prefix=V96A-PCDPHINT %s
+// V96A-PCDPHINT: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+pcdphint"
+//
diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c b/clang/test/Driver/print-supported-extensions-aarch64.c
index e6247307c7219f..642df4b699ecc3 100644
--- a/clang/test/Driver/print-supported-extensions-aarch64.c
+++ b/clang/test/Driver/print-supported-extensions-aarch64.c
@@ -35,12 +35,15 @@
// CHECK-NEXT: ls64 FEAT_LS64, FEAT_LS64_V, FEAT_LS64_ACCDATA Enable Armv8.7-A LD64B/ST64B Accelerator Extension
// CHECK-NEXT: lse FEAT_LSE Enable Armv8.1-A Large System Extension (LSE) atomic instructions
// CHECK-NEXT: lse128 FEAT_LSE128 Enable Armv9.4-A 128-bit Atomic instructions
+// CHECK-NEXT: lsui FEAT_LSUI Enable Armv9.6-A unprivileged load/store instructions
// CHECK-NEXT: lut FEAT_LUT Enable Lookup Table instructions
// CHECK-NEXT: mops FEAT_MOPS Enable Armv8.8-A memcpy and memset acceleration instructions
// CHECK-NEXT: memtag FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension
// CHECK-NEXT: simd FEAT_AdvSIMD Enable Advanced SIMD instructions
+// CHECK-NEXT: occmo FEAT_OCCMO Enable Armv9.6-A Outer cacheable cache maintenance operations
// CHECK-NEXT: pauth FEAT_PAuth Enable Armv8.3-A Pointer Authentication extension
// CHECK-NEXT: pauth-lr FEAT_PAuth_LR Enable Armv9.5-A PAC enhancements
+// CHECK-NEXT: pcdphint FEAT_PCDPHINT Enable Armv9.6-A Producer Consumer Data Placement hints
// CHECK-NEXT: pmuv3 FEAT_PMUv3 Enable Armv8.0-A PMUv3 Performance Monitors extension
// CHECK-NEXT: predres FEAT_SPECRES Enable Armv8.5-A execution and data prediction invalidation instructions
// CHECK-NEXT: rng FEAT_RNG Enable Random Number generation instructions
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 97671bc59f6b9e..2b391533fd2821 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -522,6 +522,14 @@ def FeatureTLBIW : ExtensionWithMArch<"tlbiw", "TLBIW", "FEAT_TLBIW",
// Armv9.6 Architecture Extensions
//===----------------------------------------------------------------------===//
+def FeatureLSUI: ExtensionWithMArch<"lsui", "LSUI", "FEAT_LSUI",
+ "Enable Armv9.6-A unprivileged load/store instructions">;
+
+def FeatureOCCMO: ExtensionWithMArch<"occmo", "OCCMO", "FEAT_OCCMO",
+ "Enable Armv9.6-A Outer cacheable cache maintenance operations">;
+
+def FeaturePCDPHINT: ExtensionWithMArch<"pcdphint", "PCDPHINT", "FEAT_PCDPHINT",
+ "Enable Armv9.6-A Producer Consumer Data Placement hints">;
//===----------------------------------------------------------------------===//
// Other Features
@@ -833,8 +841,8 @@ def HasV9_5aOps : Architecture64<9, 5, "a", "v9.5a",
[HasV9_4aOps, FeatureCPA],
!listconcat(HasV9_4aOps.DefaultExts, [FeatureCPA, FeatureLUT, FeatureFAMINMAX])>;
def HasV9_6aOps : Architecture64<9, 6, "a", "v9.6a",
- [HasV9_5aOps],
- !listconcat(HasV9_5aOps.DefaultExts, [])>;
+ [HasV9_5aOps, FeatureLSUI, FeatureOCCMO],
+ !listconcat(HasV9_5aOps.DefaultExts, [FeatureLSUI, FeatureOCCMO])>;
def HasV8_0rOps : Architecture64<8, 0, "r", "v8r",
[ //v8.1
FeatureCRC, FeaturePAN, FeatureLSE, FeatureCONTEXTIDREL2,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 1d1d9b5512cfc7..5eb780d8347fc3 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1729,6 +1729,11 @@ class TMSystemException<bits<3> op1, string asm, list<dag> pattern>
let Inst{4-0} = 0b00000;
}
+class APASI : SimpleSystemI<0, (ins GPR64:$Xt), "apas", "\t$Xt">, Sched<[]> {
+ let Inst{20-5} = 0b0111001110000000;
+ let DecoderNamespace = "APAS";
+}
+
// Hint instructions that take both a CRm and a 3-bit immediate.
// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot
// model patterns with sufficiently fine granularity
@@ -1742,6 +1747,25 @@ let mayStore = 1, mayLoad = 1, hasSideEffects = 1 in
let Inst{11-5} = imm;
}
+def PHintInstOperand : AsmOperandClass {
+ let Name = "PHint";
+ let ParserMethod = "tryParsePHintInstOperand";
+}
+
+def phint_op : Operand<i32> {
+ let ParserMatchClass = PHintInstOperand;
+ let PrintMethod = "printPHintOp";
+}
+
+class STSHHI
+ : SimpleSystemI<0, (ins phint_op:$policy), "stshh", "\t$policy", []>,
+ Sched<[WriteHint]> {
+ bits<3> policy;
+ let Inst{20-12} = 0b000110010;
+ let Inst{11-8} = 0b0110;
+ let Inst{7-5} = policy;
+}
+
// System instructions taking a single literal operand which encodes into
// CRm. op2 differentiates the opcodes.
def BarrierAsmOperand : AsmOperandClass {
@@ -4689,6 +4713,56 @@ multiclass StorePairNoAlloc<bits<2> opc, bit V, DAGOperand regtype,
GPR64sp:$Rn, 0)>;
}
+// armv9.6-a load/store no-allocate pair FEAT_LSUI (no-allocate)
+
+class BaseLoadStorePairNoAllocLSUI<bits<2> opc, bit V, bit L, dag oops, dag iops,
+ string asm>
+ : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn, $offset]", "", []> {
+ bits<5> Rt;
+ bits<5> Rt2;
+ bits<5> Rn;
+ bits<7> offset;
+ let Inst{31-30} = opc;
+ let Inst{29-27} = 0b101;
+ let Inst{26} = V;
+ let Inst{25-23} = 0b000;
+ let Inst{22} = L;
+ let Inst{21-15} = offset;
+ let Inst{14-10} = Rt2;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let DecoderMethod = "DecodePairLdStInstruction";
+}
+
+multiclass LoadPairNoAllocLSUI<bits<2> opc, bit V, DAGOperand regtype,
+ Operand indextype, string asm> {
+ let hasSideEffects = 0, mayStore = 0, mayLoad = 1 in
+ def i : BaseLoadStorePairNoAllocLSUI<opc, V, 1,
+ (outs regtype:$Rt, regtype:$Rt2),
+ (ins GPR64sp:$Rn, indextype:$offset), asm>,
+ Sched<[WriteLD, WriteLDHi]>;
+
+
+ def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(NAME # "i") regtype:$Rt, regtype:$Rt2,
+ GPR64sp:$Rn, 0)>;
+}
+
+multiclass StorePairNoAllocLSUI<bits<2> opc, bit V, DAGOperand regtype,
+ Operand indextype, string asm> {
+ let hasSideEffects = 0, mayStore = 1, mayLoad = 0 in
+ def i : BaseLoadStorePairNoAllocLSUI<opc, V, 0, (outs),
+ (ins regtype:$Rt, regtype:$Rt2,
+ GPR64sp:$Rn, indextype:$offset),
+ asm>,
+ Sched<[WriteSTP]>;
+
+ def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(NAME # "i") regtype:$Rt, regtype:$Rt2,
+ GPR64sp:$Rn, 0)>;
+}
+
//---
// Load/store exclusive
//---
@@ -4769,6 +4843,109 @@ class LoadExclusivePair<bits<2> sz, bit o2, bit L, bit o1, bit o0,
let PostEncoderMethod = "fixLoadStoreExclusive<0,1>";
}
+// Armv9.6-a load-store exclusive instructions
+let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in
+class BaseLoadStoreExclusiveLSUI<bits<2> sz, bit L, bit o0,
+ dag oops, dag iops, string asm, string operands>
+ : I<oops, iops, asm, operands, "", []> {
+ let Inst{31-30} = sz;
+ let Inst{29-23} = 0b0010010;
+ let Inst{22} = L;
+ let Inst{15} = o0;
+}
+
+
+// Neither Rs nor Rt2 operands.
+
+class LoadExclusiveLSUI<bits<2> sz, bit L, bit o0,
+ RegisterClass regtype, string asm>
+ : BaseLoadStoreExclusiveLSUI<sz, L, o0, (outs regtype:$Rt),
+ (ins GPR64sp0:$Rn), asm, "\t$Rt, [$Rn]">,
+ Sched<[WriteLD]>
+{
+ bits<5> Rt;
+ bits<5> Rn;
+ let Inst{20-16} = 0b11111;
+ let Unpredictable{20-16} = 0b11111;
+ let Inst{14-10} = 0b11111;
+ let Unpredictable{14-10} = 0b11111;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let PostEncoderMethod = "fixLoadStoreExclusive<0,0>";
+}
+
+ class StoreExclusiveLSUI<bits<2> sz, bit L, bit o0,
+ RegisterClass regtype, string asm>
+ : BaseLoadStoreExclusiveLSUI<sz, L, o0, (outs GPR32:$Ws),
+ (ins regtype:$Rt, GPR64sp0:$Rn),
+ asm, "\t$Ws, $Rt, [$Rn]">,
+ Sched<[WriteSTX]> {
+ bits<5> Ws;
+ bits<5> Rt;
+ bits<5> Rn;
+ let Inst{20-16} = Ws;
+ let Inst{15} = o0;
+ let Inst{14-10} = 0b11111;
+ let Unpredictable{14-10} = 0b11111;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let Constraints = "@earlyclobber $Ws";
+ let PostEncoderMethod = "fixLoadStoreExclusive<1,0>";
+ }
+
+// Armv9.6-a load-store unprivileged instructions
+class BaseLoadUnprivilegedLSUI<bits<2> sz, dag oops, dag iops, string asm>
+ : I<oops, iops, asm, "\t$Rt, [$Rn]", "", []> {
+ bits<5> Rt;
+ bits<5> Rn;
+ let Inst{31-30} = sz;
+ let Inst{29-23} = 0b0010010;
+ let Inst{22} = 0b1;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = 0b11111;
+ let Unpredictable{20-16} = 0b11111;
+ let Inst{15} = 0b0;
+ let Inst{14-10} = 0b11111;
+ let Unpredictable{14-10} = 0b11111;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+ let PostEncoderMethod = "fixLoadStoreExclusive<0,0>";
+}
+
+multiclass LoadUnprivilegedLSUI<bits<2> sz, RegisterClass regtype, string asm> {
+ def i : BaseLoadUnprivilegedLSUI<sz, (outs regtype:$Rt),
+ (ins GPR64sp0:$Rn), asm>,
+ Sched<[WriteLD]>;
+
+}
+
+class BaseStoreUnprivilegedLSUI<bits<2> sz, dag oops, dag iops, string asm>
+ : I<oops, iops, asm, "\t$Ws, $Rt, [$Rn]", "", []> {
+ bits<5> Rt;
+ bits<5> Rn;
+ bits<5> Ws;
+ let Inst{31-30} = sz;
+ let Inst{29-23} = 0b0010010;
+ let Inst{22} = 0b0;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Ws;
+ let Inst{15} = 0b0;
+ let Inst{14-10} = 0b11111;
+ let Unpredictable{14-10} = 0b11111;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+ let PostEncoderMethod = "fixLoadStoreExclusive<1,0>";
+}
+
+multiclass StoreUnprivilegedLSUI<bits<2> sz, RegisterClass regtype, string asm> {
+ def i : BaseStoreUnprivilegedLSUI<sz, (outs GPR32: $Ws),
+ (ins regtype:$Rt, GPR64sp0:$Rn),
+ asm>,
+ Sched<[WriteSTX]>;
+}
+
// Simple store release operations do not check the exclusive monitor.
let mayLoad = 0, mayStore = 1 in
class StoreRelease<bits<2> sz, bit o2, bit L, bit o1, bit o0,
@@ -11845,6 +12022,48 @@ multiclass CompareAndSwapPair<bits<1> Acq, bits<1> Rel, string order> {
def X : BaseCASP<order, "", XSeqPairClassOperand>;
}
+// v9.6-a CAST unprivileged instructions
+let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
+class BaseCASTEncoding<dag oops, dag iops, string asm,
+ string cstr, list<dag> pattern>
+ : I<oops, iops, asm, "\t$Rs, $Rt, [$Rn]", cstr, pattern> {
+ bits<5> Rs;
+ bits<5> Rn;
+ bits<5> Rt;
+ bit L;
+ bit o0;
+ bits<2> Sz;
+ let Inst{31-30} = Sz;
+ let Inst{29-23} = 0b0010011;
+ let Inst{22} = L;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Rs;
+ let Inst{15} = o0;
+ let Inst{14-10} = 0b11111;
+ let Unpredictable{14-10} = 0b11111;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+}
+
+multiclass CompareAndSwapUnprivileged<bits<2> Sz, bit L, bit o0, string order> {
+ let Sz = Sz, L = L, o0 = o0 in
+ def X : BaseCASTEncoding <
+ (outs GPR64:$out),
+ (ins GPR64:$Rs, GPR64:$Rt, GPR64sp0:$Rn),
+ "cas" # order # "t",
+ "$out = $Rs",[]>, Sched<[WriteAtomic]>;
+
+}
+
+multiclass CompareAndSwapPairUnprivileged<bits<2> Sz, bit L, bit o0, string order> {
+ let Sz = Sz, L = L, o0 = o0 in
+ def X : BaseCASTEncoding<(outs XSeqPairClassOperand:$out),
+ (ins XSeqPairClassOperand:$Rs, XSeqPairClassOperand:$Rt, GPR64sp0:$Rn),
+ "casp" # order # "t",
+ "$out = $Rs",[]>,
+ Sched<[WriteAtomic]>;
+}
+
let Predicates = [HasLSE] in
class BaseSWP<string order, string size, RegisterClass RC>
: I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "swp" # order # size,
@@ -11878,6 +12097,35 @@ multiclass Swap<bits<1> Acq, bits<1> Rel, string order> {
let Sz = 0b11, Acq = Acq, Rel = Rel in def X : BaseSWP<order, "", GPR64>;
}
+// v9.6a swap operations
+class BaseSWPLSUI<string order, RegisterClass RC>
+ : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "swpt" # order,
+ "\t$Rs, $Rt, [$Rn]","",[]>,
+ Sched<[WriteAtomic]> {
+ bits<2> Sz;
+ bit Acq;
+ bit Rel;
+ bits<5> Rs;
+ bits<5> Rn;
+ bits<5> Rt;
+ let Inst{31-30} = Sz;
+ let Inst{29-24} = 0b011001;
+ let Inst{23} = Acq;
+ let Inst{22} = Rel;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rs;
+ let Inst{15} = 0b1;
+ let Inst{14-12} = 0b000;
+ let Inst{11-10} = 0b01;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+}
+
+multiclass SwapLSUI<bits<1> Acq, bits<1> Rel, string order> {
+ let Sz = 0b00, Acq = Acq, Rel = Rel in def W : BaseSWPLSUI<order, GPR32>;
+ let Sz = 0b01, Acq = Acq, Rel = Rel in def X : BaseSWPLSUI<order, GPR64>;
+}
+
let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
class BaseLDOPregister<string op, string order, string size, RegisterClass RC>
: I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "ld" # op # order # size,
@@ -11916,6 +12164,39 @@ multiclass LDOPregister<bits<3> opc, string op, bits<1> Acq, bits<1> Rel,
def X : BaseLDOPregister<op, order, "", GPR64>;
}
+class BaseLDOPregisterLSUI<string op, string order, RegisterClass RC>
+ : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "ldt" # op # order,
+ "\t$Rs, $Rt, [$Rn]","",[]>,
+ Sched<[WriteAtomic]> {
+ bits<2> Sz;
+ bit Acq;
+ bit Rel;
+ bits<5> Rs;
+ bits<3> opc;
+ bits<5> Rn;
+ bits<5> Rt;
+ let Inst{31-30} = Sz;
+ let Inst{29-24} = 0b011001;
+ let Inst{23} = Acq;
+ let Inst{22} = Rel;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rs;
+ let Inst{15} = 0b0;
+ let Inst{14-12} = opc;
+ let Inst{11-10} = 0b01;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+}
+
+
+multiclass LDOPregisterLSUI<bits<3> opc, string op, bits<1> Acq, bits<1> Rel,
+ string order> {
+ let Sz = 0b00, Acq = Acq, Rel = Rel, opc = opc in
+ def W : BaseLDOPregisterLSUI<op, order, GPR32>;
+ let Sz = 0b01, Acq = Acq, Rel = Rel, opc = opc in
+ def X : BaseLDOPregisterLSUI<op, order, GPR64>;
+}
+
// Differing SrcRHS and DstRHS allow you to cover CLR & SUB by giving a more
// complex DAG for DstRHS.
let Predicates = [HasLSE] in
@@ -12020,6 +12301,29 @@ multiclass STOPregister<string asm, string instr> {
!cast<Instruction>(instr # "X")>;
}
+class BaseSTOPregisterLSUI<string asm, RegisterClass OP, Register Reg,
+ Instruction inst> :
+ InstAlias<asm # "\t$Rs, [$Rn]", (inst Reg, OP:$Rs, GPR64sp:$Rn)>;
+
+multiclass STOPregisterLSUI<string asm, string instr> {
+ def : BaseSTOPregisterLSUI<asm # "a", GPR32, WZR,
+ !cast<Instruction>(instr # "W")>;
+ def : BaseSTOPregisterLSUI<asm # "a", GPR64, XZR,
+ !cast<Instruction>(instr # "X")>;
+ def : BaseSTOPregisterLSUI<asm # "l", GPR32, WZR,
+ !cast<Instruction>(instr # "W")>;
+ def : BaseSTOPregisterLSUI<asm # "l", GPR64, XZR,
+ !cast<Instruction>(instr # "X")>;
+ def : BaseSTOPregisterLSUI<asm # "al", GPR32, WZR,
+ !cast<Instruction>(instr # "W")>;
+ def : BaseSTOPregisterLSUI<asm # "al", GPR64, XZR,
+ !cast<Instruction>(instr # "X")>;
+ def : BaseSTOPregisterLSUI<asm, GPR32, WZR,
+ !cast<Instruction>(instr # "W")>;
+ def : BaseSTOPregisterLSUI<asm, GPR64, XZR,
+ !cast<Instruction>(instr # "X")>;
+}
+
class LoadStore64B_base<bits<3> opc, string asm_inst, string asm_ops,
dag iops, dag oops, list<dag> pat>
: I<oops, iops, asm_inst, asm_ops, "", pat>,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 325508b62a9f14..99e586d9953ece 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -213,6 +213,13 @@ def HasSMEF8F16 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8
def HasSMEF8F32 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F32()">,
AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">;
+def HasPCDPHINT : Predicate<"Subtarget->hasPCDPHINT()">,
+ AssemblerPredicateWithAll<(all_of FeaturePCDPHINT), "pcdphint">;
+def HasLSUI : Predicate<"Subtarget->hasLSUI()">,
+ AssemblerPredicateWithAll<(all_of FeatureLSUI), "lsui">;
+def HasOCCMO : Predicate<"Subtarget->hasOCCMO()">,
+ AssemblerPredicateWithAll<(all_of FeatureOCCMO), "occmo">;
+
// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
// they should be enabled if either has been specified.
def HasSVEorSME
@@ -1231,6 +1238,11 @@ def : InstAlias<"sevl", (HINT 0b101)>;
def : InstAlias<"dgh", (HINT 0b110)>;
def : InstAlias<"esb", (HINT 0b10000)>, Requires<[HasRAS]>;
def : InstAlias<"csdb", (HINT 20)>;
+
+let Predicates = [HasPCDPHINT] in {
+ def STSHH: STSHHI;
+}
+
// In order to be able to write readable assembly, LLVM should accept assembly
// inputs that use Branch Target Indentification mnemonics, even with BTI disabled.
// However, in order to be compatible with other assemblers (e.g. GAS), LLVM
@@ -2568,12 +2580,59 @@ defm CASPA : CompareAndSwapPair<1, 0, "a">;
defm CASPL : CompareAndSwapPair<0, 1, "l">;
defm CASPAL : CompareAndSwapPair<1, 1, "al">;
+// v9.6-a atomic CAST
+let Predicates = [HasLSUI] in {
+defm CAST : CompareAndSwapUnprivileged<0b11, 0, 0, "">;
+defm CASLT : CompareAndSwapUnprivileged<0b11, 0, 1, "l">;
+defm CASAT : CompareAndSwapUnprivileged<0b11, 1, 0, "a">;
+defm CASALT : CompareAndSwapUnprivileged<0b11, 1, 1, "al">;
+
+// v9.6-a atomic CASPT
+defm CASPT : CompareAndSwapPairUnprivileged<0b01, 0...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
It might be worth splitting each feature into its own commit rather than one big commit, it makes the review easier. Currently it's difficult to determine which section belongs to which feature. |
Hmm, yeah possible. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for the work @nasherm .
I left some comments. I am not sure if all of them proceed.
Carol
Thank you for the review. I'm not sure about the instruction alias suggestions. I would've thought that although these instructions are equivalent the different execution conditions mean they don't alias. Am I wrong in my thinking? |
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank @nasherm,
I still have one question about the DC alias. But I think the patch is good.
Add support for the following Armv9.6-A memory systems extensions: FEAT_LSUI - Unprivileged Load Store FEAT_OCCMO - Outer Cacheable Cache Maintenance Operation FEAT_PCDPHINT - Producer-Consumer Data Placement Hints FEAT_SRMASK - Bitwise System Register Write Masks as documented here: https://developer.arm.com/documentation/109697/2024_09/Feature-descriptions/The-Armv9-6-architecture-extension Co-authored-by: Jonathan Thackray <[email protected]> Change-Id: Icbbdd194b8885d8a1b9d513faf0b85e74011201d
Change-Id: I2760c0cf55b3fd13240105c536e37f618209ed2e
Fix `clang-format`
@CarolineConcatto I hope I answered your question. If so I'm hoping to merge this by EOD if there's no more to do |
@@ -53,3 +53,16 @@ | |||
// RUN: %clang -target aarch64 -march=armv9.6a+sve-f16f32mm -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE-F16F32MM %s | |||
// RUN: %clang -target aarch64 -march=armv9.6-a+sve-f16f32mm -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE-F16F32MM %s | |||
// V96A-SVE-F16F32MM: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+sve-f16f32mm" | |||
// | |||
// RUN: %clang -target aarch64 -march=armv9.6a+lsui -### -c %s 2>&1 | FileCheck -check-prefix=V96A-LSUI %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
note: -target has been deprecated around clang 3.4. use --target= for new tests
@@ -0,0 +1,324 @@ | |||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
update_mca_test_checks.py is only for llvm-mca. Seems wrong usage?
There is now update_mc_test_checks.py . You can also do %extract-encodings
like AMDGPU/gfx12_asm_vop1-fake16.s
For new tests, if [0xe9,0x7f,0x5f,0xc9]
looks cumbersome, you can use [e97f5fc9]
with the new --hex option.
In PR llvm#112341, the `APAS` instruction was added as part of the Armv9.6-A specification, but it didn't take the Xt register parameter. This change fixes this.
In PR #112341, the `APAS` instruction was added as part of the Armv9.6-A specification, but it didn't take the Xt register parameter. This change fixes this.
Add support for the following Armv9.6-A memory systems extensions:
FEAT_LSUI - Unprivileged Load Store
FEAT_OCCMO - Outer Cacheable Cache Maintenance Operation
FEAT_PCDPHINT - Producer-Consumer Data Placement Hints
FEAT_SRMASK - Bitwise System Register Write Masks
as documented here:
https://developer.arm.com/documentation/109697/2024_09/Feature-descriptions/The-Armv9-6-architecture-extension
Co-authored-by: Jonathan Thackray [email protected]