Skip to content

Commit 7da1905

Browse files
committed
[AArch32] Armv8.6-a Matrix Mult Assembly + Intrinsics
This patch upstreams support for the Armv8.6-a Matrix Multiplication Extension. A summary of the features can be found here: https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/arm-architecture-developments-armv8-6-a This patch includes: - Assembly support for AArch32 - Intrinsics Support for AArch32 Neon Intrinsics for Matrix Multiplication Note: these extensions are optional in the 8.6a architecture and so have to be enabled by default No additional IR types or C Types are needed for this extension. This is part of a patch series, starting with BFloat16 support and the other components in the armv8.6a extension (in previous patches linked in phabricator) Based on work by: - Luke Geeson - Oliver Stannard - Luke Cheeseman Reviewers: t.p.northover, miyuki Reviewed By: miyuki Subscribers: miyuki, ostannard, kristof.beyls, hiraditya, danielkiss, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D77872
1 parent 832cd74 commit 7da1905

File tree

10 files changed

+286
-8
lines changed

10 files changed

+286
-8
lines changed

clang/lib/Basic/Targets/ARM.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,7 @@ bool ARMTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
425425
// Note that SoftFloatABI is initialized in our constructor.
426426
HWDiv = 0;
427427
DotProd = 0;
428+
HasMatMul = 0;
428429
HasFloat16 = true;
429430
ARMCDECoprocMask = 0;
430431

@@ -491,6 +492,8 @@ bool ARMTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
491492
FPU |= FPARMV8;
492493
MVE |= MVE_INT | MVE_FP;
493494
HW_FP |= HW_FP_SP | HW_FP_HP;
495+
} else if (Feature == "+i8mm") {
496+
HasMatMul = 1;
494497
} else if (Feature.size() == strlen("+cdecp0") && Feature >= "+cdecp0" &&
495498
Feature <= "+cdecp7") {
496499
unsigned Coproc = Feature.back() - '0';
@@ -820,6 +823,9 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts,
820823
if (DotProd)
821824
Builder.defineMacro("__ARM_FEATURE_DOTPROD", "1");
822825

826+
if (HasMatMul)
827+
Builder.defineMacro("__ARM_FEATURE_MATMUL_INT8", "1");
828+
823829
switch (ArchKind) {
824830
default:
825831
break;

clang/lib/Basic/Targets/ARM.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ class LLVM_LIBRARY_VISIBILITY ARMTargetInfo : public TargetInfo {
7575
unsigned DSP : 1;
7676
unsigned Unaligned : 1;
7777
unsigned DotProd : 1;
78+
unsigned HasMatMul : 1;
7879

7980
enum {
8081
LDREX_B = (1 << 0), /// byte (8-bit)

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4807,6 +4807,7 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
48074807
NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
48084808
NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
48094809
NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
4810+
NEONMAP2(vmmlaq_v, arm_neon_ummla, arm_neon_smmla, 0),
48104811
NEONMAP0(vmovl_v),
48114812
NEONMAP0(vmovn_v),
48124813
NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
@@ -4914,6 +4915,9 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
49144915
NEONMAP0(vtrnq_v),
49154916
NEONMAP0(vtst_v),
49164917
NEONMAP0(vtstq_v),
4918+
NEONMAP1(vusdot_v, arm_neon_usdot, 0),
4919+
NEONMAP1(vusdotq_v, arm_neon_usdot, 0),
4920+
NEONMAP1(vusmmlaq_v, arm_neon_usmmla, 0),
49174921
NEONMAP0(vuzp_v),
49184922
NEONMAP0(vuzpq_v),
49194923
NEONMAP0(vzip_v),
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
// RUN: %clang_cc1 -triple armv8.6a-arm-none-eabi -target-feature +neon -target-feature +fullfp16 -target-feature +i8mm \
2+
// RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone -emit-llvm -o - %s \
3+
// RUN: | opt -S -mem2reg -sroa \
4+
// RUN: | FileCheck %s
5+
6+
// REQUIRES: arm-registered-target
7+
8+
#include <arm_neon.h>
9+
10+
// CHECK-LABEL: test_vmmlaq_s32
11+
// CHECK: [[VAL:%.*]] = call <4 x i32> @llvm.arm.neon.smmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b)
12+
// CHECK: ret <4 x i32> [[VAL]]
13+
int32x4_t test_vmmlaq_s32(int32x4_t r, int8x16_t a, int8x16_t b) {
14+
return vmmlaq_s32(r, a, b);
15+
}
16+
17+
// CHECK-LABEL: test_vmmlaq_u32
18+
// CHECK: [[VAL:%.*]] = call <4 x i32> @llvm.arm.neon.ummla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b)
19+
// CHECK: ret <4 x i32> [[VAL]]
20+
uint32x4_t test_vmmlaq_u32(uint32x4_t r, uint8x16_t a, uint8x16_t b) {
21+
return vmmlaq_u32(r, a, b);
22+
}
23+
24+
// CHECK-LABEL: test_vusmmlaq_s32
25+
// CHECK: [[VAL:%.*]] = call <4 x i32> @llvm.arm.neon.usmmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b)
26+
// CHECK: ret <4 x i32> [[VAL]]
27+
int32x4_t test_vusmmlaq_s32(int32x4_t r, uint8x16_t a, int8x16_t b) {
28+
return vusmmlaq_s32(r, a, b);
29+
}
30+
31+
// CHECK-LABEL: test_vusdot_s32
32+
// CHECK: [[VAL:%.*]] = call <2 x i32> @llvm.arm.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b)
33+
// CHECK: ret <2 x i32> [[VAL]]
34+
int32x2_t test_vusdot_s32(int32x2_t r, uint8x8_t a, int8x8_t b) {
35+
return vusdot_s32(r, a, b);
36+
}
37+
38+
// CHECK-LABEL: test_vusdot_lane_s32
39+
// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %b to <2 x i32>
40+
// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
41+
// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
42+
// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer
43+
// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
44+
// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> %r to <8 x i8>
45+
// CHECK: [[OP:%.*]] = call <2 x i32> @llvm.arm.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> [[TMP3]])
46+
// CHECK: ret <2 x i32> [[OP]]
47+
int32x2_t test_vusdot_lane_s32(int32x2_t r, uint8x8_t a, int8x8_t b) {
48+
return vusdot_lane_s32(r, a, b, 0);
49+
}
50+
51+
// CHECK-LABEL: test_vsudot_lane_s32
52+
// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %b to <2 x i32>
53+
// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
54+
// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
55+
// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer
56+
// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
57+
// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> %r to <8 x i8>
58+
// CHECK: [[OP:%.*]] = call <2 x i32> @llvm.arm.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> [[TMP3]], <8 x i8> %a)
59+
// CHECK: ret <2 x i32> [[OP]]
60+
int32x2_t test_vsudot_lane_s32(int32x2_t r, int8x8_t a, uint8x8_t b) {
61+
return vsudot_lane_s32(r, a, b, 0);
62+
}
63+
64+
// CHECK-LABEL: test_vusdotq_lane_s32
65+
// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %b to <2 x i32>
66+
// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
67+
// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
68+
// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <4 x i32> zeroinitializer
69+
// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
70+
// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> %r to <16 x i8>
71+
// CHECK: [[OP:%.*]] = call <4 x i32> @llvm.arm.neon.usdot.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> [[TMP4]])
72+
// CHECK: ret <4 x i32> [[OP]]
73+
int32x4_t test_vusdotq_lane_s32(int32x4_t r, uint8x16_t a, int8x8_t b) {
74+
return vusdotq_lane_s32(r, a, b, 0);
75+
}
76+
77+
// CHECK-LABEL: test_vsudotq_lane_s32
78+
// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %b to <2 x i32>
79+
// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
80+
// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
81+
// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <4 x i32> zeroinitializer
82+
// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> %r to <16 x i8>
83+
// CHECK: [[OP:%.*]] = call <4 x i32> @llvm.arm.neon.usdot.v4i32.v16i8(<4 x i32> %r, <16 x i8> %3, <16 x i8> %a)
84+
// CHECK: ret <4 x i32> [[OP]]
85+
int32x4_t test_vsudotq_lane_s32(int32x4_t r, int8x16_t a, uint8x8_t b) {
86+
return vsudotq_lane_s32(r, a, b, 0);
87+
}

llvm/include/llvm/IR/IntrinsicsARM.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -773,6 +773,19 @@ class Neon_Dot_Intrinsic
773773
def int_arm_neon_udot : Neon_Dot_Intrinsic;
774774
def int_arm_neon_sdot : Neon_Dot_Intrinsic;
775775

776+
// v8.6-A Matrix Multiply Intrinsics
777+
class Neon_MatMul_Intrinsic
778+
: Intrinsic<[llvm_anyvector_ty],
779+
[LLVMMatchType<0>, llvm_anyvector_ty,
780+
LLVMMatchType<1>],
781+
[IntrNoMem]>;
782+
def int_arm_neon_ummla : Neon_MatMul_Intrinsic;
783+
def int_arm_neon_smmla : Neon_MatMul_Intrinsic;
784+
def int_arm_neon_usmmla : Neon_MatMul_Intrinsic;
785+
def int_arm_neon_usdot : Neon_Dot_Intrinsic;
786+
787+
// v8.6-A Bfloat Intrinsics
788+
776789
def int_arm_cls: Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
777790
def int_arm_cls64: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
778791

llvm/lib/Target/ARM/ARM.td

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,9 @@ def FeatureSB : SubtargetFeature<"sb", "HasSB", "true",
428428
def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16", "true",
429429
"Enable support for BFloat16 instructions", [FeatureNEON]>;
430430

431+
def FeatureMatMulInt8 : SubtargetFeature<"i8mm", "HasMatMulInt8",
432+
"true", "Enable Matrix Multiply Int8 Extension", [FeatureNEON]>;
433+
431434
// Armv8.1-M extensions
432435

433436
def FeatureLOB : SubtargetFeature<"lob", "HasLOB", "true",
@@ -529,7 +532,8 @@ def HasV8_5aOps : SubtargetFeature<"v8.5a", "HasV8_5aOps", "true",
529532

530533
def HasV8_6aOps : SubtargetFeature<"v8.6a", "HasV8_6aOps", "true",
531534
"Support ARM v8.6a instructions",
532-
[HasV8_5aOps, FeatureBF16]>;
535+
[HasV8_5aOps, FeatureBF16,
536+
FeatureMatMulInt8]>;
533537

534538
def HasV8_1MMainlineOps : SubtargetFeature<
535539
"v8.1m.main", "HasV8_1MMainlineOps", "true",

llvm/lib/Target/ARM/ARMInstrNEON.td

Lines changed: 80 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4823,10 +4823,10 @@ def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
48234823
// We put them in the VFPV8 decoder namespace because the ARM and Thumb
48244824
// encodings are the same and thus no further bit twiddling is necessary
48254825
// in the disassembler.
4826-
class VDOT<bit op6, bit op4, RegisterClass RegTy, string Asm, string AsmTy,
4827-
ValueType AccumTy, ValueType InputTy,
4826+
class VDOT<bit op6, bit op4, bit op23, RegisterClass RegTy, string Asm,
4827+
string AsmTy, ValueType AccumTy, ValueType InputTy,
48284828
SDPatternOperator OpNode> :
4829-
N3Vnp<0b11000, 0b10, 0b1101, op6, op4, (outs RegTy:$dst),
4829+
N3Vnp<{0b1100, op23}, 0b10, 0b1101, op6, op4, (outs RegTy:$dst),
48304830
(ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), N3RegFrm, IIC_VDOTPROD,
48314831
Asm, AsmTy,
48324832
[(set (AccumTy RegTy:$dst),
@@ -4838,10 +4838,19 @@ class VDOT<bit op6, bit op4, RegisterClass RegTy, string Asm, string AsmTy,
48384838
let Constraints = "$dst = $Vd";
48394839
}
48404840

4841-
def VUDOTD : VDOT<0, 1, DPR, "vudot", "u8", v2i32, v8i8, int_arm_neon_udot>;
4842-
def VSDOTD : VDOT<0, 0, DPR, "vsdot", "s8", v2i32, v8i8, int_arm_neon_sdot>;
4843-
def VUDOTQ : VDOT<1, 1, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>;
4844-
def VSDOTQ : VDOT<1, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>;
4841+
4842+
class VUSDOT<bit op6, bit op4, bit op23, RegisterClass RegTy, string Asm,
4843+
string AsmTy, ValueType AccumTy, ValueType InputTy,
4844+
SDPatternOperator OpNode> :
4845+
VDOT<op6, op4, op23, RegTy, Asm, AsmTy, AccumTy, InputTy, OpNode> {
4846+
let hasNoSchedulingInfo = 1;
4847+
4848+
}
4849+
4850+
def VUDOTD : VDOT<0, 1, 0, DPR, "vudot", "u8", v2i32, v8i8, int_arm_neon_udot>;
4851+
def VSDOTD : VDOT<0, 0, 0, DPR, "vsdot", "s8", v2i32, v8i8, int_arm_neon_sdot>;
4852+
def VUDOTQ : VDOT<1, 1, 0, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>;
4853+
def VSDOTQ : VDOT<1, 0, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>;
48454854

48464855
// Indexed dot product instructions:
48474856
multiclass DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty,
@@ -4876,6 +4885,70 @@ defm VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR, v4i32, v16i8,
48764885
defm VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR, v4i32, v16i8,
48774886
int_arm_neon_sdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
48784887

4888+
// v8.6A matrix multiplication extension
4889+
let Predicates = [HasMatMulInt8] in {
4890+
class N3VMatMul<bit B, bit U, string Asm, string AsmTy,
4891+
SDPatternOperator OpNode>
4892+
: N3Vnp<{0b1100, B}, 0b10, 0b1100, 1, U, (outs QPR:$dst),
4893+
(ins QPR:$Vd, QPR:$Vn, QPR:$Vm), N3RegFrm, NoItinerary,
4894+
Asm, AsmTy,
4895+
[(set (v4i32 QPR:$dst), (OpNode (v4i32 QPR:$Vd),
4896+
(v16i8 QPR:$Vn),
4897+
(v16i8 QPR:$Vm)))]> {
4898+
let DecoderNamespace = "VFPV8";
4899+
let Constraints = "$dst = $Vd";
4900+
let hasNoSchedulingInfo = 1;
4901+
}
4902+
4903+
multiclass N3VMixedDotLane<bit Q, bit U, string Asm, string AsmTy, RegisterClass RegTy,
4904+
ValueType AccumTy, ValueType InputTy, SDPatternOperator OpNode,
4905+
dag RHS> {
4906+
4907+
def "" : N3Vnp<0b11101, 0b00, 0b1101, Q, U, (outs RegTy:$dst),
4908+
(ins RegTy:$Vd, RegTy:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), N3RegFrm,
4909+
NoItinerary, Asm, AsmTy, []> {
4910+
bit lane;
4911+
let hasNoSchedulingInfo = 1;
4912+
let Inst{5} = lane;
4913+
let AsmString = !strconcat(Asm, ".", AsmTy, "\t$Vd, $Vn, $Vm$lane");
4914+
let DecoderNamespace = "VFPV8";
4915+
let Constraints = "$dst = $Vd";
4916+
}
4917+
4918+
def : Pat<
4919+
(AccumTy (OpNode (AccumTy RegTy:$Vd),
4920+
(InputTy RegTy:$Vn),
4921+
(InputTy (bitconvert (AccumTy
4922+
(ARMvduplane (AccumTy RegTy:$Vm),
4923+
VectorIndex32:$lane)))))),
4924+
(!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
4925+
4926+
}
4927+
4928+
multiclass SUDOTLane<bit Q, RegisterClass RegTy, ValueType AccumTy, ValueType InputTy, dag RHS>
4929+
: N3VMixedDotLane<Q, 1, "vsudot", "u8", RegTy, AccumTy, InputTy, null_frag, null_frag> {
4930+
def : Pat<
4931+
(AccumTy (int_arm_neon_usdot (AccumTy RegTy:$Vd),
4932+
(InputTy (bitconvert (AccumTy
4933+
(ARMvduplane (AccumTy RegTy:$Vm),
4934+
VectorIndex32:$lane)))),
4935+
(InputTy RegTy:$Vn))),
4936+
(!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
4937+
}
4938+
4939+
def VSMMLA : N3VMatMul<0, 0, "vsmmla", "s8", int_arm_neon_smmla>;
4940+
def VUMMLA : N3VMatMul<0, 1, "vummla", "u8", int_arm_neon_ummla>;
4941+
def VUSMMLA : N3VMatMul<1, 0, "vusmmla", "s8", int_arm_neon_usmmla>;
4942+
def VUSDOTD : VUSDOT<0, 0, 1, DPR, "vusdot", "s8", v2i32, v8i8, int_arm_neon_usdot>;
4943+
def VUSDOTQ : VUSDOT<1, 0, 1, QPR, "vusdot", "s8", v4i32, v16i8, int_arm_neon_usdot>;
4944+
4945+
defm VUSDOTDI : N3VMixedDotLane<0, 0, "vusdot", "s8", DPR, v2i32, v8i8,
4946+
int_arm_neon_usdot, (v2i32 DPR_VFP2:$Vm)>;
4947+
defm VUSDOTQI : N3VMixedDotLane<1, 0, "vusdot", "s8", QPR, v4i32, v16i8,
4948+
int_arm_neon_usdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
4949+
defm VSUDOTDI : SUDOTLane<0, DPR, v2i32, v8i8, (v2i32 DPR_VFP2:$Vm)>;
4950+
defm VSUDOTQI : SUDOTLane<1, QPR, v4i32, v16i8, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
4951+
}
48794952

48804953
// ARMv8.3 complex operations
48814954
class BaseN3VCP8ComplexTied<bit op21, bit op4, bit s, bit q,

llvm/lib/Target/ARM/ARMPredicates.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,8 @@ def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">,
110110
AssemblerPredicate<(all_of FeatureFP16FML),"full half-float fml">;
111111
def HasBF16 : Predicate<"Subtarget->hasBF16()">,
112112
AssemblerPredicate<(all_of FeatureBF16),"BFloat16 floating point extension">;
113+
def HasMatMulInt8 : Predicate<"Subtarget->hasMatMulInt8()">,
114+
AssemblerPredicate<(all_of FeatureMatMulInt8),"8-bit integer matrix multiply">;
113115
def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">,
114116
AssemblerPredicate<(all_of FeatureHWDivThumb), "divide in THUMB">;
115117
def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,

llvm/lib/Target/ARM/ARMSubtarget.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
260260
/// HasBF16 - True if subtarget supports BFloat16 floating point operations
261261
bool HasBF16 = false;
262262

263+
/// HasMatMulInt8 - True if subtarget supports 8-bit integer matrix multiply
264+
bool HasMatMulInt8 = false;
265+
263266
/// HasD32 - True if subtarget has the full 32 double precision
264267
/// FP registers for VFPv3.
265268
bool HasD32 = false;
@@ -704,6 +707,8 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
704707
/// Return true if the CPU supports any kind of instruction fusion.
705708
bool hasFusion() const { return hasFuseAES() || hasFuseLiterals(); }
706709

710+
bool hasMatMulInt8() const { return HasMatMulInt8; }
711+
707712
const Triple &getTargetTriple() const { return TargetTriple; }
708713

709714
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }

0 commit comments

Comments
 (0)