Skip to content

Commit 67ff5ba

Browse files
authored
[AArch64] Add assembly/disaasembly of atomic ld/st (#112892)
This patch adds assembly/disassembly for the following instructions: ldfadd{a,al,l,}, ldbfadd{a,al,l,} ldfmax{a,al,l,}, ldbfmax{a,al,l,} ldfmaxnm{a,al,l,}, ldbfmaxnm{a,al,l,} ldfmin{a,al,l,}, ldbfmin{a,al,l,} ldfminnm{a,al,l,} ldbfminnm{a,al,l,} stfadd{l,}, stbfadd{l,} stfmax{l,}, stbfmax{l,} stfmaxnm{l,}, stbfmaxnm{l,} stfmin{l,}, stbfmin{l,} stfminnm{l,}, stbfminnm{l,} According to [1] [1]https://developer.arm.com/documentation/ddi0602 Co-authored-by: Spencer Abson [[email protected]](mailto:[email protected]) Co-authored-by: Caroline Concatto [[email protected]](mailto:[email protected])
1 parent 1dfdbf7 commit 67ff5ba

28 files changed

+3469
-0
lines changed

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12626,3 +12626,64 @@ def : TokenAlias<".H", ".h">;
1262612626
def : TokenAlias<".S", ".s">;
1262712627
def : TokenAlias<".D", ".d">;
1262812628
def : TokenAlias<".Q", ".q">;
12629+
12630+
//----------------------------------------------------------------------------
12631+
// 2024 Armv9.6 Extensions
12632+
//----------------------------------------------------------------------------
12633+
12634+
let mayLoad = 1, mayStore = 1 in
12635+
class BaseAtomicFPLoad<RegisterClass regtype, bits<2> sz, bits<2> AR,
12636+
bits<3> op0, string asm>
12637+
: I<(outs regtype:$Rt),
12638+
(ins regtype:$Rs, GPR64sp:$Rn),
12639+
asm, "\t$Rs, $Rt, [$Rn]","", []>,
12640+
Sched<[]> {
12641+
bits<5> Rt;
12642+
bits<5> Rs;
12643+
bits<5> Rn;
12644+
let Inst{31-30} = sz;
12645+
let Inst{29-24} = 0b111100;
12646+
let Inst{23-22} = AR;
12647+
let Inst{21} = 0b1;
12648+
let Inst{20-16} = Rs;
12649+
let Inst{15} = 0b0;
12650+
let Inst{14-12} = op0;
12651+
let Inst{11-10} = 0b00;
12652+
let Inst{9-5} = Rn;
12653+
let Inst{4-0} = Rt;
12654+
}
12655+
12656+
multiclass AtomicFPLoad<bits<2> AR, bits<3> op0, string asm> {
12657+
def D : BaseAtomicFPLoad<FPR64, 0b11, AR, op0, asm>;
12658+
def S : BaseAtomicFPLoad<FPR32, 0b10, AR, op0, asm>;
12659+
def H : BaseAtomicFPLoad<FPR16, 0b01, AR, op0, asm>;
12660+
}
12661+
12662+
let mayLoad = 1, mayStore = 1 in
12663+
class BaseAtomicFPStore<RegisterClass regtype, bits<2> sz, bit R,
12664+
bits<3> op0, string asm>
12665+
: I<(outs),
12666+
(ins regtype:$Rs, GPR64sp:$Rn),
12667+
asm, "\t$Rs, [$Rn]",
12668+
"", []>,
12669+
Sched<[]> {
12670+
bits<5> Rt;
12671+
bits<5> Rs;
12672+
bits<5> Rn;
12673+
let Inst{31-30} = sz;
12674+
let Inst{29-23} = 0b1111000;
12675+
let Inst{22} = R;
12676+
let Inst{21} = 0b1;
12677+
let Inst{20-16} = Rs;
12678+
let Inst{15} = 0b1;
12679+
let Inst{14-12} = op0;
12680+
let Inst{11-10} = 0b00;
12681+
let Inst{9-5} = Rn;
12682+
let Inst{4-0} = 0b11111;
12683+
}
12684+
12685+
multiclass AtomicFPStore<bit R, bits<3> op0, string asm> {
12686+
def D : BaseAtomicFPStore<FPR64, 0b11, R, op0, asm>;
12687+
def S : BaseAtomicFPStore<FPR32, 0b10, R, op0, asm>;
12688+
def H : BaseAtomicFPStore<FPR16, 0b01, R, op0, asm>;
12689+
}

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10299,6 +10299,78 @@ defm : PromoteBinaryv8f16Tov4f32<any_fdiv, FDIVv4f32>;
1029910299
defm : PromoteBinaryv8f16Tov4f32<any_fmul, FMULv4f32>;
1030010300
defm : PromoteBinaryv8f16Tov4f32<any_fsub, FSUBv4f32>;
1030110301

10302+
//===-----------------------------------------------------===//
10303+
// Atomic floating-point in-memory instructions (FEAT_LSFE)
10304+
//===-----------------------------------------------------===//
10305+
10306+
let Predicates = [HasLSFE] in {
10307+
// Floating-point Atomic Load
10308+
defm LDFADDA : AtomicFPLoad<0b10, 0b000, "ldfadda">;
10309+
defm LDFADDAL : AtomicFPLoad<0b11, 0b000, "ldfaddal">;
10310+
defm LDFADD : AtomicFPLoad<0b00, 0b000, "ldfadd">;
10311+
defm LDFADDL : AtomicFPLoad<0b01, 0b000, "ldfaddl">;
10312+
defm LDFMAXA : AtomicFPLoad<0b10, 0b100, "ldfmaxa">;
10313+
defm LDFMAXAL : AtomicFPLoad<0b11, 0b100, "ldfmaxal">;
10314+
defm LDFMAX : AtomicFPLoad<0b00, 0b100, "ldfmax">;
10315+
defm LDFMAXL : AtomicFPLoad<0b01, 0b100, "ldfmaxl">;
10316+
defm LDFMINA : AtomicFPLoad<0b10, 0b101, "ldfmina">;
10317+
defm LDFMINAL : AtomicFPLoad<0b11, 0b101, "ldfminal">;
10318+
defm LDFMIN : AtomicFPLoad<0b00, 0b101, "ldfmin">;
10319+
defm LDFMINL : AtomicFPLoad<0b01, 0b101, "ldfminl">;
10320+
defm LDFMAXNMA : AtomicFPLoad<0b10, 0b110, "ldfmaxnma">;
10321+
defm LDFMAXNMAL : AtomicFPLoad<0b11, 0b110, "ldfmaxnmal">;
10322+
defm LDFMAXNM : AtomicFPLoad<0b00, 0b110, "ldfmaxnm">;
10323+
defm LDFMAXNML : AtomicFPLoad<0b01, 0b110, "ldfmaxnml">;
10324+
defm LDFMINNMA : AtomicFPLoad<0b10, 0b111, "ldfminnma">;
10325+
defm LDFMINNMAL : AtomicFPLoad<0b11, 0b111, "ldfminnmal">;
10326+
defm LDFMINMN : AtomicFPLoad<0b00, 0b111, "ldfminnm">;
10327+
defm LDFMINNML : AtomicFPLoad<0b01, 0b111, "ldfminnml">;
10328+
// BFloat16
10329+
def LDBFADDA : BaseAtomicFPLoad<FPR16, 0b00, 0b10, 0b000, "ldbfadda">;
10330+
def LDBFADDAL : BaseAtomicFPLoad<FPR16, 0b00, 0b11, 0b000, "ldbfaddal">;
10331+
def LDBFADD : BaseAtomicFPLoad<FPR16, 0b00, 0b00, 0b000, "ldbfadd">;
10332+
def LDBFADDL : BaseAtomicFPLoad<FPR16, 0b00, 0b01, 0b000, "ldbfaddl">;
10333+
def LDBFMAXA : BaseAtomicFPLoad<FPR16, 0b00, 0b10, 0b100, "ldbfmaxa">;
10334+
def LDBFMAXAL : BaseAtomicFPLoad<FPR16, 0b00, 0b11, 0b100, "ldbfmaxal">;
10335+
def LDBFMAX : BaseAtomicFPLoad<FPR16, 0b00, 0b00, 0b100, "ldbfmax">;
10336+
def LDBFMAXL : BaseAtomicFPLoad<FPR16, 0b00, 0b01, 0b100, "ldbfmaxl">;
10337+
def LDBFMINA : BaseAtomicFPLoad<FPR16, 0b00, 0b10, 0b101, "ldbfmina">;
10338+
def LDBFMINAL : BaseAtomicFPLoad<FPR16, 0b00, 0b11, 0b101, "ldbfminal">;
10339+
def LDBFMIN : BaseAtomicFPLoad<FPR16, 0b00, 0b00, 0b101, "ldbfmin">;
10340+
def LDBFMINL : BaseAtomicFPLoad<FPR16, 0b00, 0b01, 0b101, "ldbfminl">;
10341+
def LDBFMAXNMA : BaseAtomicFPLoad<FPR16, 0b00, 0b10, 0b110, "ldbfmaxnma">;
10342+
def LDBFMAXNMAL : BaseAtomicFPLoad<FPR16, 0b00, 0b11, 0b110, "ldbfmaxnmal">;
10343+
def LDBFMAXNM : BaseAtomicFPLoad<FPR16, 0b00, 0b00, 0b110, "ldbfmaxnm">;
10344+
def LDBFMAXNML : BaseAtomicFPLoad<FPR16, 0b00, 0b01, 0b110, "ldbfmaxnml">;
10345+
def LDBFMINNMA : BaseAtomicFPLoad<FPR16, 0b00, 0b10, 0b111, "ldbfminnma">;
10346+
def LDBFMINNMAL : BaseAtomicFPLoad<FPR16, 0b00, 0b11, 0b111, "ldbfminnmal">;
10347+
def LDBFMINNM : BaseAtomicFPLoad<FPR16, 0b00, 0b00, 0b111, "ldbfminnm">;
10348+
def LDBFMINNML : BaseAtomicFPLoad<FPR16, 0b00, 0b01, 0b111, "ldbfminnml">;
10349+
10350+
// Floating-point Atomic Store
10351+
defm STFADD : AtomicFPStore<0b0, 0b000, "stfadd">;
10352+
defm STFADDL : AtomicFPStore<0b1, 0b000, "stfaddl">;
10353+
defm STFMAX : AtomicFPStore<0b0, 0b100, "stfmax">;
10354+
defm STFMAXL : AtomicFPStore<0b1, 0b100, "stfmaxl">;
10355+
defm STFMIN : AtomicFPStore<0b0, 0b101, "stfmin">;
10356+
defm STFMINL : AtomicFPStore<0b1, 0b101, "stfminl">;
10357+
defm STFMAXNM : AtomicFPStore<0b0, 0b110, "stfmaxnm">;
10358+
defm STFMAXNML : AtomicFPStore<0b1, 0b110, "stfmaxnml">;
10359+
defm STFMINNM : AtomicFPStore<0b0, 0b111, "stfminnm">;
10360+
defm STFMINNML : AtomicFPStore<0b1, 0b111, "stfminnml">;
10361+
// BFloat16
10362+
def STBFADD : BaseAtomicFPStore<FPR16, 0b00, 0b0, 0b000, "stbfadd">;
10363+
def STBFADDL : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b000, "stbfaddl">;
10364+
def STBFMAX : BaseAtomicFPStore<FPR16, 0b00, 0b0, 0b100, "stbfmax">;
10365+
def STBFMAXL : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b100, "stbfmaxl">;
10366+
def STBFMIN : BaseAtomicFPStore<FPR16, 0b00, 0b0, 0b101, "stbfmin">;
10367+
def STBFMINL : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b101, "stbfminl">;
10368+
def STBFMAXNM : BaseAtomicFPStore<FPR16, 0b00, 0b0, 0b110, "stbfmaxnm">;
10369+
def STBFMAXNML : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b110, "stbfmaxnml">;
10370+
def STBFMINNM : BaseAtomicFPStore<FPR16, 0b00, 0b0, 0b111, "stbfminnm">;
10371+
def STBFMINNML : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b111, "stbfminnml">;
10372+
}
10373+
1030210374
include "AArch64InstrAtomics.td"
1030310375
include "AArch64SVEInstrInfo.td"
1030410376
include "AArch64SMEInstrInfo.td"
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
2+
3+
.arch armv9.6-a+lsfe
4+
.arch armv9.6-a+nolsfe
5+
ldfadd h0, h1, [x2]
6+
// CHECK: error: instruction requires: lsfe
7+
// CHECK: ldfadd h0, h1, [x2]
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// RUN: llvm-mc -triple aarch64 -o - %s 2>&1 | FileCheck %s
2+
3+
.arch armv9.6-a+lsfe
4+
ldfadd h0, h1, [x2]
5+
// CHECK: ldfadd h0, h1, [x2]
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
2+
3+
.arch_extension lsfe
4+
.arch_extension nolsfe
5+
ldfadd h0, h1, [x2]
6+
// CHECK: error: instruction requires: lsfe
7+
// CHECK-NEXT: ldfadd h0, h1, [x2]
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// RUN: llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
2+
3+
.arch_extension lsfe
4+
ldfadd h0, h1, [x2]
5+
// CHECK: ldfadd h0, h1, [x2]
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
2+
3+
.cpu generic+lsfe
4+
.cpu generic+nolsfe
5+
ldfadd h0, h1, [x2]
6+
// CHECK: error: instruction requires: lsfe
7+
// CHECK-NEXT: ldfadd h0, h1, [x2]
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// RUN: llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
2+
3+
.cpu generic+lsfe
4+
ldfadd h0, h1, [x2]
5+
// CHECK: ldfadd h0, h1, [x2]

0 commit comments

Comments
 (0)