Skip to content

Commit 96813de

Browse files
authored
AMDGPU: Define a feature for v_dot4_f32_* instructions (#84248)
FeatureDot11Insts (dot11-insts) for: v_dot4_f32_fp8_fp8, v_dot4_f32_fp8_bf8, v_dot4_f32_bf8_fp8, v_dot4_f32_bf8_bf8
1 parent 395bc79 commit 96813de

File tree

7 files changed

+28
-10
lines changed

7 files changed

+28
-10
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -256,10 +256,10 @@ TARGET_BUILTIN(__builtin_amdgcn_sudot4, "iIbiIbiiIb", "nc", "dot8-insts")
256256
TARGET_BUILTIN(__builtin_amdgcn_sdot8, "SiSiSiSiIb", "nc", "dot1-insts")
257257
TARGET_BUILTIN(__builtin_amdgcn_udot8, "UiUiUiUiIb", "nc", "dot7-insts")
258258
TARGET_BUILTIN(__builtin_amdgcn_sudot8, "iIbiIbiiIb", "nc", "dot8-insts")
259-
TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_fp8_bf8, "fUiUif", "nc", "gfx12-insts")
260-
TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_bf8_fp8, "fUiUif", "nc", "gfx12-insts")
261-
TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_fp8_fp8, "fUiUif", "nc", "gfx12-insts")
262-
TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_bf8_bf8, "fUiUif", "nc", "gfx12-insts")
259+
TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_fp8_bf8, "fUiUif", "nc", "dot11-insts")
260+
TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_bf8_fp8, "fUiUif", "nc", "dot11-insts")
261+
TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_fp8_fp8, "fUiUif", "nc", "dot11-insts")
262+
TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_bf8_bf8, "fUiUif", "nc", "dot11-insts")
263263

264264
//===----------------------------------------------------------------------===//
265265
// GFX10+ only builtins.

clang/test/CodeGenOpenCL/amdgpu-features.cl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,8 @@
100100
// GFX1103: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
101101
// GFX1150: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
102102
// GFX1151: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
103-
// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
104-
// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
103+
// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
104+
// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
105105

106106
// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64"
107107

clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ kernel void builtins_amdgcn_dl_insts_err(
5050
iOut[3] = __builtin_amdgcn_sudot8(false, A, true, B, C, false); // expected-error {{'__builtin_amdgcn_sudot8' needs target feature dot8-insts}}
5151
iOut[4] = __builtin_amdgcn_sudot8(true, A, false, B, C, true); // expected-error {{'__builtin_amdgcn_sudot8' needs target feature dot8-insts}}
5252

53-
fOut[5] = __builtin_amdgcn_dot4_f32_fp8_bf8(uiA, uiB, fC); // expected-error {{'__builtin_amdgcn_dot4_f32_fp8_bf8' needs target feature gfx12-insts}}
54-
fOut[6] = __builtin_amdgcn_dot4_f32_bf8_fp8(uiA, uiB, fC); // expected-error {{'__builtin_amdgcn_dot4_f32_bf8_fp8' needs target feature gfx12-insts}}
55-
fOut[7] = __builtin_amdgcn_dot4_f32_fp8_fp8(uiA, uiB, fC); // expected-error {{'__builtin_amdgcn_dot4_f32_fp8_fp8' needs target feature gfx12-insts}}
56-
fOut[8] = __builtin_amdgcn_dot4_f32_bf8_bf8(uiA, uiB, fC); // expected-error {{'__builtin_amdgcn_dot4_f32_bf8_bf8' needs target feature gfx12-insts}}
53+
fOut[5] = __builtin_amdgcn_dot4_f32_fp8_bf8(uiA, uiB, fC); // expected-error {{'__builtin_amdgcn_dot4_f32_fp8_bf8' needs target feature dot11-insts}}
54+
fOut[6] = __builtin_amdgcn_dot4_f32_bf8_fp8(uiA, uiB, fC); // expected-error {{'__builtin_amdgcn_dot4_f32_bf8_fp8' needs target feature dot11-insts}}
55+
fOut[7] = __builtin_amdgcn_dot4_f32_fp8_fp8(uiA, uiB, fC); // expected-error {{'__builtin_amdgcn_dot4_f32_fp8_fp8' needs target feature dot11-insts}}
56+
fOut[8] = __builtin_amdgcn_dot4_f32_bf8_bf8(uiA, uiB, fC); // expected-error {{'__builtin_amdgcn_dot4_f32_bf8_bf8' needs target feature dot11-insts}}
5757
}

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,12 @@ def FeatureDot10Insts : SubtargetFeature<"dot10-insts",
650650
"Has v_dot2_f32_f16 instruction"
651651
>;
652652

653+
def FeatureDot11Insts : SubtargetFeature<"dot11-insts",
654+
"HasDot11Insts",
655+
"true",
656+
"Has v_dot4_f32_fp8_fp8, v_dot4_f32_fp8_bf8, v_dot4_f32_bf8_fp8, v_dot4_f32_bf8_bf8 instructions"
657+
>;
658+
653659
def FeatureMAIInsts : SubtargetFeature<"mai-insts",
654660
"HasMAIInsts",
655661
"true",
@@ -1521,6 +1527,7 @@ def FeatureISAVersion12 : FeatureSet<
15211527
FeatureDot8Insts,
15221528
FeatureDot9Insts,
15231529
FeatureDot10Insts,
1530+
FeatureDot11Insts,
15241531
FeatureNSAEncoding,
15251532
FeaturePartialNSAEncoding,
15261533
FeatureWavefrontSize32,
@@ -2029,6 +2036,9 @@ def HasDot9Insts : Predicate<"Subtarget->hasDot9Insts()">,
20292036
def HasDot10Insts : Predicate<"Subtarget->hasDot10Insts()">,
20302037
AssemblerPredicate<(all_of FeatureDot10Insts)>;
20312038

2039+
def HasDot11Insts : Predicate<"Subtarget->hasDot11Insts()">,
2040+
AssemblerPredicate<(all_of FeatureDot11Insts)>;
2041+
20322042
def HasGetWaveIdInst : Predicate<"Subtarget->hasGetWaveIdInst()">,
20332043
AssemblerPredicate<(all_of FeatureGetWaveIdInst)>;
20342044

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
153153
bool HasDot8Insts = false;
154154
bool HasDot9Insts = false;
155155
bool HasDot10Insts = false;
156+
bool HasDot11Insts = false;
156157
bool HasMAIInsts = false;
157158
bool HasFP8Insts = false;
158159
bool HasFP8ConversionInsts = false;
@@ -793,6 +794,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
793794
return HasDot10Insts;
794795
}
795796

797+
bool hasDot11Insts() const {
798+
return HasDot11Insts;
799+
}
800+
796801
bool hasMAIInsts() const {
797802
return HasMAIInsts;
798803
}

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,10 +480,12 @@ multiclass VOP3PDOTF8Inst <string OpName, SDPatternOperator intrinsic_node> {
480480
i32:$src2_modifiers, f32:$src2)>;
481481
}
482482

483+
let OtherPredicates = [HasDot11Insts] in {
483484
defm V_DOT4_F32_FP8_BF8 : VOP3PDOTF8Inst<"v_dot4_f32_fp8_bf8", int_amdgcn_dot4_f32_fp8_bf8>;
484485
defm V_DOT4_F32_BF8_FP8 : VOP3PDOTF8Inst<"v_dot4_f32_bf8_fp8", int_amdgcn_dot4_f32_bf8_fp8>;
485486
defm V_DOT4_F32_FP8_FP8 : VOP3PDOTF8Inst<"v_dot4_f32_fp8_fp8", int_amdgcn_dot4_f32_fp8_fp8>;
486487
defm V_DOT4_F32_BF8_BF8 : VOP3PDOTF8Inst<"v_dot4_f32_bf8_bf8", int_amdgcn_dot4_f32_bf8_bf8>;
488+
}
487489

488490
def : UDot2Pat<V_DOT2_U32_U16>;
489491
def : SDot2Pat<V_DOT2_I32_I16>;

llvm/lib/TargetParser/TargetParser.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
318318
Features["dot8-insts"] = true;
319319
Features["dot9-insts"] = true;
320320
Features["dot10-insts"] = true;
321+
Features["dot11-insts"] = true;
321322
Features["dl-insts"] = true;
322323
Features["atomic-ds-pk-add-16-insts"] = true;
323324
Features["atomic-flat-pk-add-16-insts"] = true;

0 commit comments

Comments
 (0)