Skip to content

Commit df04883

Browse files
committed
[AMDGPU] Split dot7 feature
Differential Revision: https://reviews.llvm.org/D142507
1 parent 7fd3ed9 commit df04883

File tree

7 files changed

+51
-24
lines changed

7 files changed

+51
-24
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2bf16, "V2sV2s*3V2s", "t", "gfx9
236236
// Deep learning builtins.
237237
//===----------------------------------------------------------------------===//
238238

239-
TARGET_BUILTIN(__builtin_amdgcn_fdot2, "fV2hV2hfIb", "nc", "dot7-insts")
239+
TARGET_BUILTIN(__builtin_amdgcn_fdot2, "fV2hV2hfIb", "nc", "dot10-insts")
240240
TARGET_BUILTIN(__builtin_amdgcn_fdot2_f16_f16, "hV2hV2hh", "nc", "dot9-insts")
241241
TARGET_BUILTIN(__builtin_amdgcn_fdot2_bf16_bf16, "sV2sV2ss", "nc", "dot9-insts")
242242
TARGET_BUILTIN(__builtin_amdgcn_fdot2_f32_bf16, "fV2sV2sfIb", "nc", "dot9-insts")

clang/lib/Basic/Targets/AMDGPU.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
197197
Features["dot7-insts"] = true;
198198
Features["dot8-insts"] = true;
199199
Features["dot9-insts"] = true;
200+
Features["dot10-insts"] = true;
200201
Features["dl-insts"] = true;
201202
Features["16-bit-insts"] = true;
202203
Features["dpp"] = true;
@@ -220,6 +221,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
220221
Features["dot5-insts"] = true;
221222
Features["dot6-insts"] = true;
222223
Features["dot7-insts"] = true;
224+
Features["dot10-insts"] = true;
223225
Features["dl-insts"] = true;
224226
Features["16-bit-insts"] = true;
225227
Features["dpp"] = true;
@@ -237,6 +239,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
237239
Features["dot5-insts"] = true;
238240
Features["dot6-insts"] = true;
239241
Features["dot7-insts"] = true;
242+
Features["dot10-insts"] = true;
240243
[[fallthrough]];
241244
case GK_GFX1013:
242245
case GK_GFX1010:
@@ -270,6 +273,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
270273
Features["dot1-insts"] = true;
271274
Features["dot2-insts"] = true;
272275
Features["dot7-insts"] = true;
276+
Features["dot10-insts"] = true;
273277
[[fallthrough]];
274278
case GK_GFX90C:
275279
case GK_GFX909:

clang/test/CodeGenOpenCL/amdgpu-features.cl

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -69,27 +69,27 @@
6969
// GFX900: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
7070
// GFX902: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
7171
// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
72-
// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
73-
// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
72+
// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
73+
// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
7474
// GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
75-
// GFX90A: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
75+
// GFX90A: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
7676
// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
77-
// GFX940: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
77+
// GFX940: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
7878
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
79-
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
80-
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
79+
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
80+
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
8181
// GFX1013: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
82-
// GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
83-
// GFX1031: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
84-
// GFX1032: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
85-
// GFX1033: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
86-
// GFX1034: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
87-
// GFX1035: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
88-
// GFX1036: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
89-
// GFX1100: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
90-
// GFX1101: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
91-
// GFX1102: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
92-
// GFX1103: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
93-
// GFX1103-W64: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64"
82+
// GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
83+
// GFX1031: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
84+
// GFX1032: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
85+
// GFX1033: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
86+
// GFX1034: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
87+
// GFX1035: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
88+
// GFX1036: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
89+
// GFX1100: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
90+
// GFX1101: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
91+
// GFX1102: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
92+
// GFX1103: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
93+
// GFX1103-W64: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64"
9494

9595
kernel void test() {}

clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ kernel void builtins_amdgcn_dl_insts_err(
1616
short2 v2ssA, short2 v2ssB, short sC, int siA, int siB, int siC,
1717
ushort2 v2usA, ushort2 v2usB, uint uiA, uint uiB, uint uiC,
1818
int A, int B, int C) {
19-
fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot7-insts}}
20-
fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot7-insts}}
19+
fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot10-insts}}
20+
fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot10-insts}}
2121

2222
hOut[0] = __builtin_amdgcn_fdot2_f16_f16(v2hA, v2hB, hC); // expected-error {{'__builtin_amdgcn_fdot2_f16_f16' needs target feature dot9-insts}}
2323

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,7 @@ def FeatureDot6Insts : SubtargetFeature<"dot6-insts",
581581
def FeatureDot7Insts : SubtargetFeature<"dot7-insts",
582582
"HasDot7Insts",
583583
"true",
584-
"Has v_dot2_f32_f16, v_dot4_u32_u8, v_dot8_u32_u4 instructions"
584+
"Has v_dot4_u32_u8, v_dot8_u32_u4 instructions"
585585
>;
586586

587587
def FeatureDot8Insts : SubtargetFeature<"dot8-insts",
@@ -596,6 +596,12 @@ def FeatureDot9Insts : SubtargetFeature<"dot9-insts",
596596
"Has v_dot2_f16_f16, v_dot2_bf16_bf16, v_dot2_f32_bf16 instructions"
597597
>;
598598

599+
def FeatureDot10Insts : SubtargetFeature<"dot10-insts",
600+
"HasDot10Insts",
601+
"true",
602+
"Has v_dot2_f32_f16 instruction"
603+
>;
604+
599605
def FeatureMAIInsts : SubtargetFeature<"mai-insts",
600606
"HasMAIInsts",
601607
"true",
@@ -1081,6 +1087,7 @@ def FeatureISAVersion9_0_6 : FeatureSet<
10811087
FeatureDot1Insts,
10821088
FeatureDot2Insts,
10831089
FeatureDot7Insts,
1090+
FeatureDot10Insts,
10841091
FeatureSupportsSRAMECC,
10851092
FeatureImageGather4D16Bug]>;
10861093

@@ -1101,6 +1108,7 @@ def FeatureISAVersion9_0_8 : FeatureSet<
11011108
FeatureDot5Insts,
11021109
FeatureDot6Insts,
11031110
FeatureDot7Insts,
1111+
FeatureDot10Insts,
11041112
FeatureMAIInsts,
11051113
FeaturePkFmacF16Inst,
11061114
FeatureAtomicFaddNoRtnInsts,
@@ -1133,6 +1141,7 @@ def FeatureISAVersion9_0_A : FeatureSet<
11331141
FeatureDot5Insts,
11341142
FeatureDot6Insts,
11351143
FeatureDot7Insts,
1144+
FeatureDot10Insts,
11361145
Feature64BitDPP,
11371146
FeaturePackedFP32Ops,
11381147
FeatureMAIInsts,
@@ -1172,6 +1181,7 @@ def FeatureISAVersion9_4_0 : FeatureSet<
11721181
FeatureDot5Insts,
11731182
FeatureDot6Insts,
11741183
FeatureDot7Insts,
1184+
FeatureDot10Insts,
11751185
Feature64BitDPP,
11761186
FeaturePackedFP32Ops,
11771187
FeatureMAIInsts,
@@ -1233,6 +1243,7 @@ def FeatureISAVersion10_1_1 : FeatureSet<
12331243
FeatureDot5Insts,
12341244
FeatureDot6Insts,
12351245
FeatureDot7Insts,
1246+
FeatureDot10Insts,
12361247
FeatureNSAEncoding,
12371248
FeatureNSAMaxSize5,
12381249
FeatureWavefrontSize32,
@@ -1256,6 +1267,7 @@ def FeatureISAVersion10_1_2 : FeatureSet<
12561267
FeatureDot5Insts,
12571268
FeatureDot6Insts,
12581269
FeatureDot7Insts,
1270+
FeatureDot10Insts,
12591271
FeatureNSAEncoding,
12601272
FeatureNSAMaxSize5,
12611273
FeatureWavefrontSize32,
@@ -1300,6 +1312,7 @@ def FeatureISAVersion10_3_0 : FeatureSet<
13001312
FeatureDot5Insts,
13011313
FeatureDot6Insts,
13021314
FeatureDot7Insts,
1315+
FeatureDot10Insts,
13031316
FeatureNSAEncoding,
13041317
FeatureNSAMaxSize13,
13051318
FeatureWavefrontSize32,
@@ -1314,6 +1327,7 @@ def FeatureISAVersion11_Common : FeatureSet<
13141327
FeatureDot7Insts,
13151328
FeatureDot8Insts,
13161329
FeatureDot9Insts,
1330+
FeatureDot10Insts,
13171331
FeatureNSAEncoding,
13181332
FeatureNSAMaxSize5,
13191333
FeatureWavefrontSize32,
@@ -1766,6 +1780,9 @@ def HasDot8Insts : Predicate<"Subtarget->hasDot8Insts()">,
17661780
def HasDot9Insts : Predicate<"Subtarget->hasDot9Insts()">,
17671781
AssemblerPredicate<(all_of FeatureDot9Insts)>;
17681782

1783+
def HasDot10Insts : Predicate<"Subtarget->hasDot10Insts()">,
1784+
AssemblerPredicate<(all_of FeatureDot10Insts)>;
1785+
17691786
def HasGetWaveIdInst : Predicate<"Subtarget->hasGetWaveIdInst()">,
17701787
AssemblerPredicate<(all_of FeatureGetWaveIdInst)>;
17711788

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
146146
bool HasDot7Insts = false;
147147
bool HasDot8Insts = false;
148148
bool HasDot9Insts = false;
149+
bool HasDot10Insts = false;
149150
bool HasMAIInsts = false;
150151
bool HasFP8Insts = false;
151152
bool HasPkFmacF16Inst = false;
@@ -738,6 +739,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
738739
return HasDot9Insts;
739740
}
740741

742+
bool hasDot10Insts() const {
743+
return HasDot10Insts;
744+
}
745+
741746
bool hasMAIInsts() const {
742747
return HasMAIInsts;
743748
}

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -337,11 +337,12 @@ defm V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16",
337337

338338
} // End SubtargetPredicate = HasDot2Insts
339339

340-
let SubtargetPredicate = HasDot7Insts in {
341-
340+
let SubtargetPredicate = HasDot10Insts in
342341
defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
343342
VOP3P_Profile<VOP_F32_V2F16_V2F16_F32, VOP3_REGULAR, /*HasDPP*/ 1>,
344343
AMDGPUfdot2, 1/*ExplicitClamp*/>;
344+
345+
let SubtargetPredicate = HasDot7Insts in {
345346
defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
346347
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
347348
defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4",

0 commit comments

Comments
 (0)