Skip to content

Commit 95c5042

Browse files
[AArch64] Add assembly/disassembly for {S,SU,US,U}MOP4{A,S} instructions (llvm#113349)
The new instructions are described in https://developer.arm.com/documentation/ddi0602/2024-09/SME-Instructions Co-Authored-By: Marian Lukac <[email protected]>
1 parent 73fbae8 commit 95c5042

30 files changed

+2438
-0
lines changed

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,33 @@ defm USMOPA_MPPZZ_D : sme_int_outer_product_i64<0b100, "usmopa", int_aarch64_sme
131131
defm USMOPS_MPPZZ_D : sme_int_outer_product_i64<0b101, "usmops", int_aarch64_sme_usmops_wide>;
132132
}
133133

134+
let Predicates = [HasSME2p2] in {
135+
defm SMOP4A : sme_quarter_outer_product_i8_i32<0b0, 0b0, 0b0, "smop4a">;
136+
defm SMOP4S : sme_quarter_outer_product_i8_i32<0b0, 0b0, 0b1, "smop4s">;
137+
defm SUMOP4A : sme_quarter_outer_product_i8_i32<0b0, 0b1, 0b0, "sumop4a">;
138+
defm SUMOP4S : sme_quarter_outer_product_i8_i32<0b0, 0b1, 0b1, "sumop4s">;
139+
defm USMOP4A : sme_quarter_outer_product_i8_i32<0b1, 0b0, 0b0, "usmop4a">;
140+
defm USMOP4S : sme_quarter_outer_product_i8_i32<0b1, 0b0, 0b1, "usmop4s">;
141+
defm UMOP4A : sme_quarter_outer_product_i8_i32<0b1, 0b1, 0b0, "umop4a">;
142+
defm UMOP4S : sme_quarter_outer_product_i8_i32<0b1, 0b1, 0b1, "umop4s">;
143+
144+
defm SMOP4A : sme_quarter_outer_product_i16_i32<0b0, 0b0, "smop4a">;
145+
defm SMOP4S : sme_quarter_outer_product_i16_i32<0b0, 0b1, "smop4s">;
146+
defm UMOP4A : sme_quarter_outer_product_i16_i32<0b1, 0b0, "umop4a">;
147+
defm UMOP4S : sme_quarter_outer_product_i16_i32<0b1, 0b1, "umop4s">;
148+
}
149+
150+
let Predicates = [HasSME2p2, HasSMEI16I64] in {
151+
defm SMOP4A : sme_quarter_outer_product_i64<0b0, 0b0, 0b0, "smop4a">;
152+
defm SMOP4S : sme_quarter_outer_product_i64<0b0, 0b0, 0b1, "smop4s">;
153+
defm SUMOP4A : sme_quarter_outer_product_i64<0b0, 0b1, 0b0, "sumop4a">;
154+
defm SUMOP4S : sme_quarter_outer_product_i64<0b0, 0b1, 0b1, "sumop4s">;
155+
defm UMOP4A : sme_quarter_outer_product_i64<0b1, 0b1, 0b0, "umop4a">;
156+
defm UMOP4S : sme_quarter_outer_product_i64<0b1, 0b1, 0b1, "umop4s">;
157+
defm USMOP4A : sme_quarter_outer_product_i64<0b1, 0b0, 0b0, "usmop4a">;
158+
defm USMOP4S : sme_quarter_outer_product_i64<0b1, 0b0, 0b1, "usmop4s">;
159+
}
160+
134161
let Predicates = [HasSME] in {
135162
//===----------------------------------------------------------------------===//
136163
// Loads and stores

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,116 @@ multiclass sme_f16_outer_product<bits<3> opc, string mnemonic, SDPatternOperator
433433
def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv8i1, nxv8f16>;
434434
}
435435

436+
class sme_quarter_outer_product_i64<bits<2> zn_u_pair, bits<2> zm_u_pair, bit subtr, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic>
437+
: I<(outs TileOp64:$ZAda),
438+
(ins TileOp64:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
439+
mnemonic, "\t$ZAda, $Zn, $Zm",
440+
"", []>,
441+
Sched<[]> {
442+
bits<3> ZAda;
443+
bits<3> Zn;
444+
bits<3> Zm;
445+
let Inst{31-25} = 0b1010000;
446+
let Inst{24} = zn_u_pair{1}; // u0
447+
let Inst{23-22} = 0b11;
448+
let Inst{21} = zm_u_pair{1}; // u1
449+
let Inst{20} = zm_u_pair{0}; // M
450+
let Inst{19-17} = Zm;
451+
let Inst{16-10} = 0b0000000;
452+
let Inst{9} = zn_u_pair{0}; // N
453+
let Inst{8-6} = Zn;
454+
let Inst{5} = 0;
455+
let Inst{4} = subtr;
456+
let Inst{3} = 0b1;
457+
let Inst{2-0} = ZAda;
458+
459+
let Constraints = "$ZAda = $_ZAda";
460+
}
461+
462+
class sme_quarter_outer_product_i8_i32<bits<2> zn_u_pair, bits<2> zm_u_pair, bit subtr, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic>
463+
: I<(outs TileOp32:$ZAda),
464+
(ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
465+
mnemonic, "\t$ZAda, $Zn, $Zm",
466+
"", []>,
467+
Sched<[]> {
468+
bits<2> ZAda;
469+
bits<3> Zn;
470+
bits<3> Zm;
471+
let Inst{31-25} = 0b1000000;
472+
let Inst{24} = zn_u_pair{1}; // u0
473+
let Inst{23-22} = 0b00;
474+
let Inst{21} = zm_u_pair{1}; // u1
475+
let Inst{20} = zm_u_pair{0}; // M
476+
let Inst{19-17} = Zm;
477+
let Inst{16-10} = 0b0100000;
478+
let Inst{9} = zn_u_pair{0}; // N
479+
let Inst{8-6} = Zn;
480+
let Inst{5} = 0;
481+
let Inst{4} = subtr;
482+
let Inst{3-2} = 0b00;
483+
let Inst{1-0} = ZAda;
484+
485+
let Constraints = "$ZAda = $_ZAda";
486+
}
487+
488+
class sme_quarter_outer_product_i16_i32<bit u0, bit N, bit M, bit subtr, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic>
489+
: I<(outs TileOp32:$ZAda),
490+
(ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
491+
mnemonic, "\t$ZAda, $Zn, $Zm",
492+
"", []>,
493+
Sched<[]> {
494+
bits<2> ZAda;
495+
bits<3> Zn;
496+
bits<3> Zm;
497+
let Inst{31-25} = 0b1000000;
498+
let Inst{24} = u0;
499+
let Inst{23-21} = 0b000;
500+
let Inst{20} = M;
501+
let Inst{19-17} = Zm;
502+
let Inst{16-10} = 0b0100000;
503+
let Inst{9} = N;
504+
let Inst{8-6} = Zn;
505+
let Inst{5} = 0;
506+
let Inst{4} = subtr;
507+
let Inst{3-2} = 0b10;
508+
let Inst{1-0} = ZAda;
509+
510+
let Constraints = "$ZAda = $_ZAda";
511+
}
512+
513+
multiclass sme_quarter_outer_product_i8_i32<bit zn_u, bit zm_u, bit subtr, string mnemonic>{
514+
def _MZZ_BToS : sme_quarter_outer_product_i8_i32<{zn_u, 0}, {zm_u, 0}, subtr,
515+
ZPR8Mul2_Lo, ZPR8Mul2_Hi, mnemonic>;
516+
def _M2ZZ_BToS : sme_quarter_outer_product_i8_i32<{zn_u, 1}, {zm_u, 0}, subtr,
517+
ZZ_b_mul_r_Lo, ZPR8Mul2_Hi, mnemonic>;
518+
def _MZ2Z_BToS : sme_quarter_outer_product_i8_i32<{zn_u, 0}, {zm_u, 1}, subtr,
519+
ZPR8Mul2_Lo, ZZ_b_mul_r_Hi, mnemonic>;
520+
def _M2Z2Z_BToS : sme_quarter_outer_product_i8_i32<{zn_u, 1}, {zm_u, 1}, subtr,
521+
ZZ_b_mul_r_Lo, ZZ_b_mul_r_Hi, mnemonic>;
522+
}
523+
524+
multiclass sme_quarter_outer_product_i16_i32<bit unsigned, bit subtr, string mnemonic>{
525+
def _MZZ_HToS : sme_quarter_outer_product_i16_i32<unsigned, 0b0, 0b0, subtr,
526+
ZPR16Mul2_Lo, ZPR16Mul2_Hi, mnemonic>;
527+
def _M2ZZ_HToS : sme_quarter_outer_product_i16_i32<unsigned, 0b1, 0b0, subtr,
528+
ZZ_h_mul_r_Lo, ZPR16Mul2_Hi, mnemonic>;
529+
def _MZ2Z_HToS : sme_quarter_outer_product_i16_i32<unsigned, 0b0, 0b1, subtr,
530+
ZPR16Mul2_Lo, ZZ_h_mul_r_Hi, mnemonic>;
531+
def _M2Z2Z_HToS : sme_quarter_outer_product_i16_i32<unsigned, 0b1, 0b1, subtr,
532+
ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi, mnemonic>;
533+
}
534+
535+
multiclass sme_quarter_outer_product_i64<bit zn_u, bit zm_u, bit subtr, string mnemonic>{
536+
def _MZZ_HtoD : sme_quarter_outer_product_i64<{zn_u, 0}, {zm_u, 0}, subtr,
537+
ZPR16Mul2_Lo, ZPR16Mul2_Hi, mnemonic>;
538+
def _M2ZZ_HtoD : sme_quarter_outer_product_i64<{zn_u, 1}, {zm_u, 0}, subtr,
539+
ZZ_h_mul_r_Lo, ZPR16Mul2_Hi, mnemonic>;
540+
def _MZ2Z_HtoD : sme_quarter_outer_product_i64<{zn_u, 0}, {zm_u, 1}, subtr,
541+
ZPR16Mul2_Lo, ZZ_h_mul_r_Hi, mnemonic>;
542+
def _M2Z2Z_HtoD : sme_quarter_outer_product_i64<{zn_u, 1}, {zm_u, 1}, subtr,
543+
ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi, mnemonic>;
544+
}
545+
436546
//===----------------------------------------------------------------------===//
437547
// SME Add Vector to Tile
438548
//===----------------------------------------------------------------------===//
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
2+
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
3+
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
4+
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
5+
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \
6+
// RUN: | llvm-objdump -d --mattr=+sme2p2 - | FileCheck %s --check-prefix=CHECK-INST
7+
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \
8+
// RUN: | llvm-objdump -d --mattr=-sme2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
9+
// Disassemble encoding and check the re-encoding (-show-encoding) matches.
10+
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
11+
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
12+
// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p2 -disassemble -show-encoding \
13+
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
14+
15+
smop4a za0.s, z0.h, z16.h // 10000000-00000000-10000000-00001000
16+
// CHECK-INST: smop4a za0.s, z0.h, z16.h
17+
// CHECK-ENCODING: [0x08,0x80,0x00,0x80]
18+
// CHECK-ERROR: instruction requires: sme2p2
19+
// CHECK-UNKNOWN: 80008008 <unknown>
20+
21+
smop4a za3.s, z12.h, z24.h // 10000000-00001000-10000001-10001011
22+
// CHECK-INST: smop4a za3.s, z12.h, z24.h
23+
// CHECK-ENCODING: [0x8b,0x81,0x08,0x80]
24+
// CHECK-ERROR: instruction requires: sme2p2
25+
// CHECK-UNKNOWN: 8008818b <unknown>
26+
27+
smop4a za3.s, z14.h, z30.h // 10000000-00001110-10000001-11001011
28+
// CHECK-INST: smop4a za3.s, z14.h, z30.h
29+
// CHECK-ENCODING: [0xcb,0x81,0x0e,0x80]
30+
// CHECK-ERROR: instruction requires: sme2p2
31+
// CHECK-UNKNOWN: 800e81cb <unknown>
32+
33+
smop4a za0.s, z0.h, {z16.h-z17.h} // 10000000-00010000-10000000-00001000
34+
// CHECK-INST: smop4a za0.s, z0.h, { z16.h, z17.h }
35+
// CHECK-ENCODING: [0x08,0x80,0x10,0x80]
36+
// CHECK-ERROR: instruction requires: sme2p2
37+
// CHECK-UNKNOWN: 80108008 <unknown>
38+
39+
smop4a za3.s, z12.h, {z24.h-z25.h} // 10000000-00011000-10000001-10001011
40+
// CHECK-INST: smop4a za3.s, z12.h, { z24.h, z25.h }
41+
// CHECK-ENCODING: [0x8b,0x81,0x18,0x80]
42+
// CHECK-ERROR: instruction requires: sme2p2
43+
// CHECK-UNKNOWN: 8018818b <unknown>
44+
45+
smop4a za3.s, z14.h, {z30.h-z31.h} // 10000000-00011110-10000001-11001011
46+
// CHECK-INST: smop4a za3.s, z14.h, { z30.h, z31.h }
47+
// CHECK-ENCODING: [0xcb,0x81,0x1e,0x80]
48+
// CHECK-ERROR: instruction requires: sme2p2
49+
// CHECK-UNKNOWN: 801e81cb <unknown>
50+
51+
smop4a za0.s, {z0.h-z1.h}, z16.h // 10000000-00000000-10000010-00001000
52+
// CHECK-INST: smop4a za0.s, { z0.h, z1.h }, z16.h
53+
// CHECK-ENCODING: [0x08,0x82,0x00,0x80]
54+
// CHECK-ERROR: instruction requires: sme2p2
55+
// CHECK-UNKNOWN: 80008208 <unknown>
56+
57+
smop4a za3.s, {z12.h-z13.h}, z24.h // 10000000-00001000-10000011-10001011
58+
// CHECK-INST: smop4a za3.s, { z12.h, z13.h }, z24.h
59+
// CHECK-ENCODING: [0x8b,0x83,0x08,0x80]
60+
// CHECK-ERROR: instruction requires: sme2p2
61+
// CHECK-UNKNOWN: 8008838b <unknown>
62+
63+
smop4a za3.s, {z14.h-z15.h}, z30.h // 10000000-00001110-10000011-11001011
64+
// CHECK-INST: smop4a za3.s, { z14.h, z15.h }, z30.h
65+
// CHECK-ENCODING: [0xcb,0x83,0x0e,0x80]
66+
// CHECK-ERROR: instruction requires: sme2p2
67+
// CHECK-UNKNOWN: 800e83cb <unknown>
68+
69+
smop4a za0.s, {z0.h-z1.h}, {z16.h-z17.h} // 10000000-00010000-10000010-00001000
70+
// CHECK-INST: smop4a za0.s, { z0.h, z1.h }, { z16.h, z17.h }
71+
// CHECK-ENCODING: [0x08,0x82,0x10,0x80]
72+
// CHECK-ERROR: instruction requires: sme2p2
73+
// CHECK-UNKNOWN: 80108208 <unknown>
74+
75+
smop4a za3.s, {z12.h-z13.h}, {z24.h-z25.h} // 10000000-00011000-10000011-10001011
76+
// CHECK-INST: smop4a za3.s, { z12.h, z13.h }, { z24.h, z25.h }
77+
// CHECK-ENCODING: [0x8b,0x83,0x18,0x80]
78+
// CHECK-ERROR: instruction requires: sme2p2
79+
// CHECK-UNKNOWN: 8018838b <unknown>
80+
81+
smop4a za3.s, {z14.h-z15.h}, {z30.h-z31.h} // 10000000-00011110-10000011-11001011
82+
// CHECK-INST: smop4a za3.s, { z14.h, z15.h }, { z30.h, z31.h }
83+
// CHECK-ENCODING: [0xcb,0x83,0x1e,0x80]
84+
// CHECK-ERROR: instruction requires: sme2p2
85+
// CHECK-UNKNOWN: 801e83cb <unknown>
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-i16i64 < %s \
2+
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
3+
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
4+
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
5+
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-i16i64 < %s \
6+
// RUN: | llvm-objdump -d --mattr=+sme2p2,+sme-i16i64 - | FileCheck %s --check-prefix=CHECK-INST
7+
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-i16i64 < %s \
8+
// RUN: | llvm-objdump -d --mattr=-sme2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
9+
// Disassemble encoding and check the re-encoding (-show-encoding) matches.
10+
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-i16i64 < %s \
11+
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
12+
// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-i16i64 -disassemble -show-encoding \
13+
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
14+
15+
smop4a za0.d, z0.h, z16.h // 10100000-11000000-00000000-00001000
16+
// CHECK-INST: smop4a za0.d, z0.h, z16.h
17+
// CHECK-ENCODING: [0x08,0x00,0xc0,0xa0]
18+
// CHECK-ERROR: instruction requires: sme2p2
19+
// CHECK-UNKNOWN: a0c00008 <unknown>
20+
21+
smop4a za5.d, z10.h, z20.h // 10100000-11000100-00000001-01001101
22+
// CHECK-INST: smop4a za5.d, z10.h, z20.h
23+
// CHECK-ENCODING: [0x4d,0x01,0xc4,0xa0]
24+
// CHECK-ERROR: instruction requires: sme2p2
25+
// CHECK-UNKNOWN: a0c4014d <unknown>
26+
27+
smop4a za7.d, z14.h, z30.h // 10100000-11001110-00000001-11001111
28+
// CHECK-INST: smop4a za7.d, z14.h, z30.h
29+
// CHECK-ENCODING: [0xcf,0x01,0xce,0xa0]
30+
// CHECK-ERROR: instruction requires: sme2p2
31+
// CHECK-UNKNOWN: a0ce01cf <unknown>
32+
33+
smop4a za0.d, z0.h, {z16.h-z17.h} // 10100000-11010000-00000000-00001000
34+
// CHECK-INST: smop4a za0.d, z0.h, { z16.h, z17.h }
35+
// CHECK-ENCODING: [0x08,0x00,0xd0,0xa0]
36+
// CHECK-ERROR: instruction requires: sme2p2
37+
// CHECK-UNKNOWN: a0d00008 <unknown>
38+
39+
smop4a za5.d, z10.h, {z20.h-z21.h} // 10100000-11010100-00000001-01001101
40+
// CHECK-INST: smop4a za5.d, z10.h, { z20.h, z21.h }
41+
// CHECK-ENCODING: [0x4d,0x01,0xd4,0xa0]
42+
// CHECK-ERROR: instruction requires: sme2p2
43+
// CHECK-UNKNOWN: a0d4014d <unknown>
44+
45+
smop4a za7.d, z14.h, {z30.h-z31.h} // 10100000-11011110-00000001-11001111
46+
// CHECK-INST: smop4a za7.d, z14.h, { z30.h, z31.h }
47+
// CHECK-ENCODING: [0xcf,0x01,0xde,0xa0]
48+
// CHECK-ERROR: instruction requires: sme2p2
49+
// CHECK-UNKNOWN: a0de01cf <unknown>
50+
51+
smop4a za0.d, {z0.h-z1.h}, z16.h // 10100000-11000000-00000010-00001000
52+
// CHECK-INST: smop4a za0.d, { z0.h, z1.h }, z16.h
53+
// CHECK-ENCODING: [0x08,0x02,0xc0,0xa0]
54+
// CHECK-ERROR: instruction requires: sme2p2
55+
// CHECK-UNKNOWN: a0c00208 <unknown>
56+
57+
smop4a za5.d, {z10.h-z11.h}, z20.h // 10100000-11000100-00000011-01001101
58+
// CHECK-INST: smop4a za5.d, { z10.h, z11.h }, z20.h
59+
// CHECK-ENCODING: [0x4d,0x03,0xc4,0xa0]
60+
// CHECK-ERROR: instruction requires: sme2p2
61+
// CHECK-UNKNOWN: a0c4034d <unknown>
62+
63+
smop4a za7.d, {z14.h-z15.h}, z30.h // 10100000-11001110-00000011-11001111
64+
// CHECK-INST: smop4a za7.d, { z14.h, z15.h }, z30.h
65+
// CHECK-ENCODING: [0xcf,0x03,0xce,0xa0]
66+
// CHECK-ERROR: instruction requires: sme2p2
67+
// CHECK-UNKNOWN: a0ce03cf <unknown>
68+
69+
smop4a za0.d, {z0.h-z1.h}, {z16.h-z17.h} // 10100000-11010000-00000010-00001000
70+
// CHECK-INST: smop4a za0.d, { z0.h, z1.h }, { z16.h, z17.h }
71+
// CHECK-ENCODING: [0x08,0x02,0xd0,0xa0]
72+
// CHECK-ERROR: instruction requires: sme2p2
73+
// CHECK-UNKNOWN: a0d00208 <unknown>
74+
75+
smop4a za5.d, {z10.h-z11.h}, {z20.h-z21.h} // 10100000-11010100-00000011-01001101
76+
// CHECK-INST: smop4a za5.d, { z10.h, z11.h }, { z20.h, z21.h }
77+
// CHECK-ENCODING: [0x4d,0x03,0xd4,0xa0]
78+
// CHECK-ERROR: instruction requires: sme2p2
79+
// CHECK-UNKNOWN: a0d4034d <unknown>
80+
81+
smop4a za7.d, {z14.h-z15.h}, {z30.h-z31.h} // 10100000-11011110-00000011-11001111
82+
// CHECK-INST: smop4a za7.d, { z14.h, z15.h }, { z30.h, z31.h }
83+
// CHECK-ENCODING: [0xcf,0x03,0xde,0xa0]
84+
// CHECK-ERROR: instruction requires: sme2p2
85+
// CHECK-UNKNOWN: a0de03cf <unknown>

0 commit comments

Comments
 (0)