-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[llvm][AArch64][Assembly]: Add LUT assembly/disassembly. #70802
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-aarch64 Author: None (hassnaaHamdi) ChangesThis patch adds the feature flags of LUT and SME_LUTv2, and the assembly/disassembly
That is according to this documentation: Patch is 47.07 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70802.diff 27 Files Affected:
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index 232b3d6a6dbb1c4..5e6b3e9b300e85f 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -162,6 +162,8 @@ enum ArchExtKind : unsigned {
AEK_FPMR = 58, // FEAT_FPMR
AEK_FP8 = 59, // FEAT_FP8
AEK_FAMINMAX = 60, // FEAT_FAMINMAX
+ AEK_LUT = 61, // FEAT_LUT
+ AEK_SME_LUTv2 = 62, // FEAT_SME_LUTv2
AEK_NUM_EXTENSIONS
};
using ExtensionBitset = Bitset<AEK_NUM_EXTENSIONS>;
@@ -273,6 +275,8 @@ inline constexpr ExtensionInfo Extensions[] = {
{"fpmr", AArch64::AEK_FPMR, "+fpmr", "-fpmr", FEAT_INIT, "", 0},
{"fp8", AArch64::AEK_FP8, "+fp8", "-fp8", FEAT_INIT, "+fpmr", 0},
{"faminmax", AArch64::AEK_FAMINMAX, "+faminmax", "-faminmax", FEAT_INIT, "", 0},
+ {"lut", AArch64::AEK_LUT, "+lut", "-lut", FEAT_INIT, "", 0},
+ {"sme-lutv2", AArch64::AEK_SME_LUTv2, "+sme-lutv2", "-sme-lutv2", FEAT_INIT, "+sme2,+fp8", 0},
// Special cases
{"none", AArch64::AEK_NONE, {}, {}, FEAT_INIT, "", ExtensionInfo::MaxFMVPriority},
};
diff --git a/llvm/include/llvm/TargetParser/SubtargetFeature.h b/llvm/include/llvm/TargetParser/SubtargetFeature.h
index e4dddfb78effbcd..2e1f00dad2df365 100644
--- a/llvm/include/llvm/TargetParser/SubtargetFeature.h
+++ b/llvm/include/llvm/TargetParser/SubtargetFeature.h
@@ -31,7 +31,7 @@ namespace llvm {
class raw_ostream;
class Triple;
-const unsigned MAX_SUBTARGET_WORDS = 4;
+const unsigned MAX_SUBTARGET_WORDS = 5;
const unsigned MAX_SUBTARGET_FEATURES = MAX_SUBTARGET_WORDS * 64;
/// Container class for subtarget features.
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 8fd9358c9f9c7a0..d53abfd29b1edcb 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -517,6 +517,12 @@ def FeatureSME2p1 : SubtargetFeature<"sme2p1", "HasSME2p1", "true",
def FeatureFAMINMAX: SubtargetFeature<"faminmax", "HasFAMINMAX", "true",
"Enable FAMIN and FAMAX instructions (FEAT_FAMINMAX)">;
+def FeatureLUT: SubtargetFeature<"lut", "HasLUT", "true",
+ "Enable Lookup Table instructions (FEAT_LUT)">;
+
+def FeatureSME_LUTv2 : SubtargetFeature<"sme-lutv2", "HasSME_LUTv2", "true",
+ "Enable Scalable Matrix Extension (SME) LUTv2 instructions (FEAT_SME_LUTv2)", [FeatureSME2, FeatureFP8]>;
+
def FeatureAppleA7SysReg : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true",
"Apple A7 (the CPU formerly known as Cyclone)">;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index a48bf77a774b75c..580275f72e40509 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -8095,6 +8095,54 @@ multiclass SIMDTableLookupTied<bit op, string asm> {
V128, VecListFour128>;
}
+//----------------------------------------------------------------------------
+// AdvSIMD LUT
+//----------------------------------------------------------------------------
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDTableLookupIndexed<bit Q, bits<5> opc, RegisterOperand vectype,
+ RegisterOperand listtype, Operand idx_type,
+ string asm, string kind>
+ : I<(outs vectype:$Rd),
+ (ins listtype:$Rn, vectype:$Rm, idx_type:$idx),
+ asm, "\t$Rd" # kind # ", $Rn, $Rm$idx", "", []>,
+ Sched<[]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29-24} = 0b001110;
+ let Inst{23-22} = opc{4-3};
+ let Inst{21} = 0;
+ let Inst{20-16} = Rm;
+ let Inst{15} = 0;
+ let Inst{14-12} = opc{2-0};
+ let Inst{11-10} = 0b00;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass BaseSIMDTableLookupIndexed2<string asm> {
+ def v16f8 : BaseSIMDTableLookupIndexed<0b1, {0b10,?,?,0b1}, V128, VecListOne16b, VectorIndexS, asm, ".16b"> {
+ bits<2> idx;
+ let Inst{14-13} = idx;
+ }
+ def v8f16 : BaseSIMDTableLookupIndexed<0b1, {0b11,?,?,?}, V128, VecListOne8h, VectorIndexH, asm, ".8h" > {
+ bits<3> idx;
+ let Inst{14-12} = idx;
+ }
+}
+
+multiclass BaseSIMDTableLookupIndexed4<string asm> {
+ def v16f8 : BaseSIMDTableLookupIndexed<0b1, {0b01,?,0b10}, V128, VecListOne16b, VectorIndexD, asm, ".16b"> {
+ bit idx;
+ let Inst{14} = idx;
+ }
+ def v8f16 : BaseSIMDTableLookupIndexed<0b1, {0b01,?,?,0b1}, V128, VecListTwo8h, VectorIndexS, asm, ".8h" > {
+ bits<2> idx;
+ let Inst{14-13} = idx;
+ }
+}
//----------------------------------------------------------------------------
// AdvSIMD scalar DUP
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 069a283dd311e50..ec972ce71cb1047 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -166,6 +166,10 @@ def HasFP8 : Predicate<"Subtarget->hasFP8()">,
AssemblerPredicateWithAll<(all_of FeatureFP8), "fp8">;
def HasFAMINMAX : Predicate<"Subtarget->hasFAMINMAX()">,
AssemblerPredicateWithAll<(all_of FeatureFAMINMAX), "faminmax">;
+def HasLUT : Predicate<"Subtarget->hasLUT()">,
+ AssemblerPredicateWithAll<(all_of FeatureLUT), "lut">;
+def HasSME_LUTv2 : Predicate<"Subtarget->hasSME_LUTv2()">,
+ AssemblerPredicateWithAll<(all_of FeatureSME_LUTv2), "sme-lutv2">;
// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
// they should be enabled if either has been specified.
@@ -5940,6 +5944,13 @@ def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd),
(v16i8 V128:$Ri), (v16i8 V128:$Rn))),
(TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>;
+//----------------------------------------------------------------------------
+// AdvSIMD LUT instructions
+//----------------------------------------------------------------------------
+let Predicates = [HasLUT] in {
+ defm LUT2 : BaseSIMDTableLookupIndexed2<"luti2">;
+ defm LUT4 : BaseSIMDTableLookupIndexed4<"luti4">;
+}
//----------------------------------------------------------------------------
// AdvSIMD scalar DUP instruction
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index eb26591908fd79c..57ad51641a2f41e 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -1252,6 +1252,10 @@ class ZPRVectorListMul<int ElementWidth, int NumRegs> : ZPRVectorList<ElementWid
let EncoderMethod = "EncodeRegAsMultipleOf<2>",
DecoderMethod = "DecodeZPR2Mul2RegisterClass" in {
+ def ZZ_mul_r : RegisterOperand<ZPR2Mul2, "printTypedVectorList<0,0>"> {
+ let ParserMatchClass = ZPRVectorListMul<0, 2>;
+ }
+
def ZZ_b_mul_r : RegisterOperand<ZPR2Mul2, "printTypedVectorList<0,'b'>"> {
let ParserMatchClass = ZPRVectorListMul<8, 2>;
}
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index f55b84b02f85162..7f568c9a225952e 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -885,3 +885,12 @@ defm FAMIN_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"famin", 0b0010101>;
defm FAMAX_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"famax", 0b0010100>;
defm FAMIN_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"famin", 0b0010101>;
} //[HasSME2, HasFAMINMAX]
+
+let Predicates = [HasSME2, HasSME_LUTv2] in {
+defm MOVT : sme2_movt_zt_to_zt<"movt", 0b0011111>;
+def LUTI4_4ZZT2Z : sme2_luti4_vector_vg4<0b00, 0b00,"luti4">;
+} //[HasSME2, HasSME_LUTv2]
+
+let Predicates = [HasSME2p1, HasSME_LUTv2] in {
+def LUTI4_S_4ZZT2Z : sme2_luti4_vector_vg4_strided<0b00, 0b00, "luti4">;
+} //[HasSME2p1, HasSME_LUTv2]
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 1a586765d58b3ca..f192cd7dbcc5135 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4029,3 +4029,12 @@ let Predicates = [HasSVE2orSME2, HasFAMINMAX] in {
defm FAMIN_ZPmZ : sve_fp_2op_p_zds<0b1111, "famin", "", null_frag, DestructiveOther>;
defm FAMAX_ZPmZ : sve_fp_2op_p_zds<0b1110, "famax", "", null_frag, DestructiveOther>;
} // End HasSVE2orSME2, HasFAMINMAX
+
+let Predicates = [HasSVE2orSME2, HasLUT] in {
+// LUTI2
+ defm LUTI2_ZZZI : sve2_luti2_vector_index<"luti2">;
+// LUTI4
+ defm LUTI4_ZZZI : sve2_luti4_vector_index<"luti4">;
+// LUTI4 (two contiguous registers)
+ defm LUTI4_Z2ZZI : sve2_luti4_vector_vg2_index<"luti4">;
+} // End HasSVE2orSME2, HasLUT
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 35abe3563eb81ab..874e46237f3707e 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3641,6 +3641,8 @@ static const struct Extension {
{"fpmr", {AArch64::FeatureFPMR}},
{"fp8", {AArch64::FeatureFP8}},
{"faminmax", {AArch64::FeatureFAMINMAX}},
+ {"lut", {AArch64::FeatureLUT}},
+ {"sme-lutv2", {AArch64::FeatureSME_LUTv2}},
};
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
@@ -4536,7 +4538,7 @@ ParseStatus AArch64AsmParser::tryParseZTOperand(OperandVector &Operands) {
Operands.push_back(AArch64Operand::CreateReg(
RegNum, RegKind::LookupTable, StartLoc, getLoc(), getContext()));
- Lex(); // Eat identifier token.
+ Lex(); // Eat register.
// Check if register is followed by an index
if (parseOptionalToken(AsmToken::LBrac)) {
@@ -4546,14 +4548,18 @@ ParseStatus AArch64AsmParser::tryParseZTOperand(OperandVector &Operands) {
if (getParser().parseExpression(ImmVal))
return ParseStatus::NoMatch;
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
+ Operands.push_back(AArch64Operand::CreateImm(
+ MCConstantExpr::create(MCE->getValue(), getContext()), StartLoc,
+ getLoc(), getContext()));
if (!MCE)
return TokError("immediate value expected for vector index");
+ if (getTok().is(AsmToken::Comma)) {
+ Lex(); // eat comma
+ if (parseOptionalMulOperand(Operands))
+ return MatchOperand_ParseFail;
+ }
if (parseToken(AsmToken::RBrac, "']' expected"))
return ParseStatus::Failure;
-
- Operands.push_back(AArch64Operand::CreateImm(
- MCConstantExpr::create(MCE->getValue(), getContext()), StartLoc,
- getLoc(), getContext()));
Operands.push_back(
AArch64Operand::CreateToken("]", getLoc(), getContext()));
}
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 988c78699179f0c..c5de5b4de4aef3a 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -1740,6 +1740,10 @@ template <unsigned NumLanes, char LaneKind>
void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
+ if (LaneKind == 0) {
+ printVectorList(MI, OpNum, STI, O, "");
+ return;
+ }
std::string Suffix(".");
if (NumLanes)
Suffix += itostr(NumLanes) + LaneKind;
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index d8b44c68fbdee10..c94ec8004d0c570 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -3059,6 +3059,25 @@ class sme2_movt_scalar_to_zt<string mnemonic, bits<7> opc>
let Inst{4-0} = Rt;
}
+// SME2 move vector to lookup table
+class sme2_movt_zt_to_zt<string mnemonic, bits<7> opc>
+ : I<(outs ZTR:$ZTt), (ins sme_elm_idx0_3:$off2, ZPRAny:$Zt),
+ mnemonic, "\t$ZTt[$off2, mul vl], $Zt",
+ "", []>, Sched<[]> {
+ bits<5> Zt;
+ bits<2> off2;
+ let Inst{31-14} = 0b110000000100111100;
+ let Inst{13-12} = off2;
+ let Inst{11-5} = opc;
+ let Inst{4-0} = Zt;
+}
+
+multiclass sme2_movt_zt_to_zt<string mnemonic, bits<7> opc> {
+ def NAME : sme2_movt_zt_to_zt<mnemonic, opc>;
+ def : InstAlias<mnemonic # "\t$ZTt, $Zt",
+ (!cast<Instruction>(NAME) ZTR:$ZTt, 0, ZPRAny:$Zt), 1>;
+}
+
//===----------------------------------------------------------------------===//
// SME2 lookup table expand one register
class sme2_luti_vector_index<bits<2> sz, bits<7> opc, RegisterOperand vector_ty,
@@ -4713,3 +4732,36 @@ class sme2p1_luti4_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty,
multiclass sme2p1_luti4_vector_vg4_index<string mnemonic> {
def _H: sme2p1_luti4_vector_vg4_index<0b01, ZZZZ_h_strided, VectorIndexD, mnemonic>;
}
+
+// SME2 lookup table two source registers expand to four contiguous destination registers
+class sme2_luti4_vector_vg4<bits<2> sz, bits<2> op, string mnemonic>
+ : I<(outs ZZZZ_b_mul_r:$Zd), (ins ZTR:$ZTt, ZZ_mul_r:$Zn),
+ mnemonic, "\t$Zd, $ZTt, $Zn",
+ "", []>, Sched<[]> {
+ bits<4> Zn;
+ bits<3> Zd;
+ let Inst{31-14} = 0b110000001000101100;
+ let Inst{13-12} = sz;
+ let Inst{11-10} = op;
+ let Inst{9-6} = Zn;
+ let Inst{5} = 0b0;
+ let Inst{4-2} = Zd;
+ let Inst{1-0} = 0b00;
+}
+
+// SME2 lookup table two source registers expand to four non-contiguous destination registers
+class sme2_luti4_vector_vg4_strided<bits<2> sz, bits<2> op, string mnemonic>
+ : I<(outs ZZZZ_b_strided:$Zd), (ins ZTR:$ZTt, ZZ_mul_r:$Zn),
+ mnemonic, "\t$Zd, $ZTt, $Zn",
+ "", []>, Sched<[]> {
+ bits<4> Zn;
+ bits<3> Zd;
+ let Inst{31-14} = 0b110000001001101100;
+ let Inst{13-12} = sz;
+ let Inst{11-10} = op;
+ let Inst{9-6} = Zn;
+ let Inst{5} = 0b0;
+ let Inst{4} = Zd{2};
+ let Inst{3-2} = 0b00;
+ let Inst{1-0} = Zd{1-0};
+}
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index d2f72fda3a229b8..5c6c83b267fe46d 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -10120,4 +10120,56 @@ class sve2_fp8_down_cvt_single<bits<2> opc, string mnemonic,
multiclass sve2_fp8_down_cvt_single<bits<2> opc, string mnemonic, RegisterOperand src> {
def NAME : sve2_fp8_down_cvt_single<opc, mnemonic, ZPR8, src>;
+}
+
+// FP8 Look up table
+class sve2_lut_vector_index<ZPRRegOp zd_ty, RegisterOperand zn_ty,
+ Operand idx_ty, bits<4>opc, string mnemonic>
+ : I<(outs zd_ty:$Zd), (ins zn_ty:$Zn, ZPRAny:$Zm, idx_ty:$idx),
+ mnemonic, "\t$Zd, $Zn, $Zm$idx",
+ "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ bits<5> Zm;
+ let Inst{31-24} = 0b01000101;
+ let Inst{22} = opc{3};
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{15-13} = 0b101;
+ let Inst{12-10} = opc{2-0};
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+// FP8 Look up table read with 2-bit indices
+multiclass sve2_luti2_vector_index<string mnemonic> {
+ def _B : sve2_lut_vector_index<ZPR8, Z_b, VectorIndexS32b, {?, 0b100}, mnemonic> {
+ bits<2> idx;
+ let Inst{23-22} = idx;
+ }
+ def _H : sve2_lut_vector_index<ZPR16, Z_h, VectorIndexH32b, {?,?,0b10}, mnemonic> {
+ bits<3> idx;
+ let Inst{23-22} = idx{2-1};
+ let Inst{12} = idx{0};
+ }
+}
+
+// FP8 Look up table read with 4-bit indices
+multiclass sve2_luti4_vector_index<string mnemonic> {
+ def _B : sve2_lut_vector_index<ZPR8, Z_b, VectorIndexD32b, 0b1001, mnemonic> {
+ bit idx;
+ let Inst{23} = idx;
+ }
+ def _H : sve2_lut_vector_index<ZPR16, Z_h, VectorIndexS32b, {?, 0b111}, mnemonic> {
+ bits<2> idx;
+ let Inst{23-22} = idx;
+ }
+}
+
+// FP8 Look up table read with 4-bit indices (two contiguous registers)
+multiclass sve2_luti4_vector_vg2_index<string mnemonic> {
+ def _H : sve2_lut_vector_index<ZPR16, ZZ_h, VectorIndexS32b, {?, 0b101}, mnemonic> {
+ bits<2> idx;
+ let Inst{23-22} = idx;
+ }
}
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/FP8/directive-arch-negative.s b/llvm/test/MC/AArch64/FP8/directive-arch-negative.s
index 86525ff68134c91..33cd713634f1bc5 100644
--- a/llvm/test/MC/AArch64/FP8/directive-arch-negative.s
+++ b/llvm/test/MC/AArch64/FP8/directive-arch-negative.s
@@ -12,3 +12,20 @@ famax v31.4h, v31.4h, v31.4h
// CHECK: error: instruction requires: faminmax
// CHECK: famax v31.4h, v31.4h, v31.4h
+.arch armv9-a+lut
+.arch armv9-a+nolut
+luti2 v30.8h, { v20.8h }, v31[7]
+// CHECK: error: instruction requires: lut
+// CHECK: luti2 v30.8h, { v20.8h }, v31[7]
+
+.arch armv9-a+sve2+lut
+.arch armv9-a+nosve2+nolut
+luti2 z0.h, { z0.h }, z0[0]
+// CHECK: error: instruction requires: lut sve2 or sme2
+// CHECK: luti2 z0.h, { z0.h }, z0[0]
+
+.arch armv9-a+sme-lutv2
+.arch armv9-a+nosme-lutv2
+luti4 { z0.b - z3.b }, zt0, { z0, z1 }
+// CHECK: error: instruction requires: sme2 sme-lutv2
+// CHECK: luti4 { z0.b - z3.b }, zt0, { z0, z1 }
diff --git a/llvm/test/MC/AArch64/FP8/directive-arch.s b/llvm/test/MC/AArch64/FP8/directive-arch.s
index e3f0a94c2ff9a37..b1d2aa9b0b7230b 100644
--- a/llvm/test/MC/AArch64/FP8/directive-arch.s
+++ b/llvm/test/MC/AArch64/FP8/directive-arch.s
@@ -3,11 +3,24 @@
.arch armv9-a+fp8
bf1cvtl v0.8h, v0.8b
// CHECK: bf1cvtl v0.8h, v0.8b
-
.arch armv9-a+nofp8
+
.arch armv9-a+faminmax
famax v31.4h, v31.4h, v31.4h
// CHECK: famax v31.4h, v31.4h, v31.4h
-
.arch armv9-a+nofaminmax
+.arch armv9-a+lut
+luti2 v30.8h, {v20.8h}, v31[7]
+// CHECK: luti2 v30.8h, { v20.8h }, v31[7]
+.arch armv9-a+nolut
+
+.arch armv9-a+sve2+lut
+luti2 z0.h, {z0.h}, z0[0]
+// CHECK: luti2 z0.h, { z0.h }, z0[0]
+.arch armv9-a+nosve2+nolut
+
+.arch armv9-a+sme-lutv2
+luti4 {z0.b-z3.b}, zt0, {z0-z1}
+// CHECK: luti4 { z0.b - z3.b }, zt0, { z0, z1 }
+.arch armv9-a+nosme-lutv2
diff --git a/llvm/test/MC/AArch64/FP8/luti2-diagnostics.s b/llvm/test/MC/AArch64/FP8/luti2-diagnostics.s
new file mode 100644
index 000000000000000..1f32fd8b0c027ed
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8/luti2-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+lut 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid lane indices
+
+luti2 v2.16b, {v1.16b}, v0[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: luti2 v2.16b, {v1.16b}, v0[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti2 v3.16b, {v2.16b}, v1[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: luti2 v3.16b, {v2.16b}, v1[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti2 v30.8h, {v21.8h}, v20[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
+// CHECK-NEXT: luti2 v30.8h, {v21.8h}, v20[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti2 v31.8h, {v31.8h}, v31[8]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
+// CHECK-NEXT: luti2 v31.8h, {v31.8h}, v31[8]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid vector suffix
+
+luti2 v2.8h, {v1.16b}, v0[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: luti2 v2.8h, {v1.16b}, v0[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti2 v31.16b, {v31.8h}, v31[7]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: luti2 v31.16b, {v31.8h}, v31[7]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/FP8/luti2.s b/llvm/test/MC/AArch64/FP8/luti2.s
new file mode 100644
index 000000000000000..c5f99f0fb87cb15
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8/luti2.s
@@ -0,0 +1,41 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+lut < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you Hassnaa for the work. I left some comments.
This patch adds the feature flags of LUT and SME_LUTv2, and the assembly/disassembly for the following instructions of NEON, SVE2 and SME2: * NEON: - LUT2 - LUT4 * SVE2: - LUTI2_ZZZI - LUTI4_ZZZI - LUTI4_Z2ZZI * SME: - MOVT - LUTI4_4ZZT2Z - LUTI4_S_4ZZT2Z That is according to this documentation: https://developer.arm.com/documentation/ddi0602/2023-09 Change-Id: I10cc46e3fe7efb9f280e26e065b22908d3a0725c
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you Hassnaa for the work.
LGTM
This patch adds the feature flags of LUT and SME_LUTv2, and the assembly/disassembly
for the following instructions of NEON, SVE2 and SME2:
That is according to this documentation:
https://developer.arm.com/documentation/ddi0602/2023-09