Skip to content

Commit 9789fb5

Browse files
jayfoadsrpande
authored andcommitted
[AMDGPU] Do not emit V_DOT2C_F32_F16_e32 on GFX12 (llvm#78709)
That instruction is not supported on GFX12. Added a testcase which previously crashed without this change. Co-authored-by: pvanhout <[email protected]> This is cherry picked from amd-staging into ROCm-6.2.1, and amd-mainline-open. Change-Id: I2256637ab6b5e7c165d4055a0af39413e3f23a8b
1 parent ab652c3 commit 9789fb5

File tree

4 files changed

+6
-4
lines changed

4 files changed

+6
-4
lines changed

clang/test/CodeGenOpenCL/amdgpu-features.cl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,8 @@
102102
// GFX1150: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
103103
// GFX1151: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
104104
// GFX1152: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
105-
// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
106-
// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
105+
// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
106+
// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
107107

108108
// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64"
109109

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1522,7 +1522,6 @@ def FeatureISAVersion12 : FeatureSet<
15221522
[FeatureGFX12,
15231523
FeatureLDSBankCount32,
15241524
FeatureDLInsts,
1525-
FeatureDot5Insts,
15261525
FeatureDot7Insts,
15271526
FeatureDot8Insts,
15281527
FeatureDot9Insts,

llvm/lib/TargetParser/TargetParser.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,6 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
277277
case GK_GFX1201:
278278
case GK_GFX1200:
279279
Features["ci-insts"] = true;
280-
Features["dot5-insts"] = true;
281280
Features["dot7-insts"] = true;
282281
Features["dot8-insts"] = true;
283282
Features["dot9-insts"] = true;

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@
33
; RUN: llc -mtriple=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
44
; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
55
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
6+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX12
67

78
declare float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 %clamp)
89

910
; GCN-LABEL: {{^}}test_llvm_amdgcn_fdot2_clamp
1011
; GFX9: v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
1112
; GFX10: v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
13+
; GFX12: v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
1214
define amdgpu_kernel void @test_llvm_amdgcn_fdot2_clamp(
1315
ptr addrspace(1) %r,
1416
ptr addrspace(1) %a,
@@ -27,6 +29,7 @@ entry:
2729
; GFX906: v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
2830
; GFX940: v_dot2c_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
2931
; GFX10: {{v_dot2c_f32_f16|v_dot2acc_f32_f16}} v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
32+
; GFX12: v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
3033
define amdgpu_kernel void @test_llvm_amdgcn_fdot2_no_clamp(
3134
ptr addrspace(1) %r,
3235
ptr addrspace(1) %a,
@@ -44,6 +47,7 @@ entry:
4447
; GFX9-LABEL: {{^}}fdot2_inline_literal
4548
; GFX906: v_dot2_f32_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 1.0
4649
; GFX940: v_dot2c_f32_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
50+
; GFX12: v_dot2_f32_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 1.0{{$}}
4751
define float @fdot2_inline_literal(<2 x half> %a, <2 x half> %b) {
4852
%ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float 1.0, i1 false)
4953
ret float %ret

0 commit comments

Comments
 (0)