Skip to content

Commit b964fee

Browse files
committed
[AMDGPU] Fix operand types for V_DOT2_F32_BF16
1 parent 24f0251 commit b964fee

File tree

7 files changed

+32
-14
lines changed

7 files changed

+32
-14
lines changed

clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx11.cl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ typedef unsigned short __attribute__((ext_vector_type(2))) ushort2;
1515
// CHECK-NEXT: [[s2:%[0-9]+]] = bitcast <2 x i16> %v2ssB to <2 x bfloat>
1616
// CHECK-NEXT: [[s3:%[0-9]+]] = bitcast i16 %sC to bfloat
1717
// CHECK-NEXT: [[d:%[0-9]+]] = tail call bfloat @llvm.amdgcn.fdot2.bf16.bf16(<2 x bfloat> [[s1]], <2 x bfloat> [[s2]], bfloat [[s3]])
18-
// CHECK: call float @llvm.amdgcn.fdot2.f32.bf16(<2 x i16> %v2ssA, <2 x i16> %v2ssB, float %fC, i1 false)
19-
// CHECK: call float @llvm.amdgcn.fdot2.f32.bf16(<2 x i16> %v2ssA, <2 x i16> %v2ssB, float %fC, i1 true)
18+
// CHECK: call float @llvm.amdgcn.fdot2.f32.bf16(<2 x bfloat> [[s1]], <2 x bfloat> [[s2]], float %fC, i1 false)
19+
// CHECK: call float @llvm.amdgcn.fdot2.f32.bf16(<2 x bfloat> [[s1]], <2 x bfloat> [[s2]], float %fC, i1 true)
2020
// CHECK: call i32 @llvm.amdgcn.udot4(i32 %uiA, i32 %uiB, i32 %uiC, i1 false)
2121
// CHECK: call i32 @llvm.amdgcn.udot4(i32 %uiA, i32 %uiB, i32 %uiC, i1 true)
2222
// CHECK: call i32 @llvm.amdgcn.sudot4(i1 true, i32 %A, i1 false, i32 %B, i32 %C, i1 false)

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2835,8 +2835,8 @@ def int_amdgcn_fdot2_f32_bf16 :
28352835
DefaultAttrsIntrinsic<
28362836
[llvm_float_ty], // %r
28372837
[
2838-
llvm_v2i16_ty, // %a
2839-
llvm_v2i16_ty, // %b
2838+
llvm_v2bf16_ty, // %a
2839+
llvm_v2bf16_ty, // %b
28402840
llvm_float_ty, // %c
28412841
llvm_i1_ty // %clamp
28422842
],

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2494,7 +2494,7 @@ def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>;
24942494

24952495
def VOP_F16_V2F16_V2F16_F16 : VOPProfile <[f16, v2f16, v2f16, f16]>;
24962496
def VOP_BF16_V2BF16_V2BF16_BF16: VOPProfile <[bf16, v2bf16, v2bf16, bf16]>;
2497-
def VOP_F32_V2I16_V2I16_F32 : VOPProfile <[f32, v2i16, v2i16, f32]>;
2497+
def VOP_F32_V2BF16_V2BF16_F32 : VOPProfile <[f32, v2bf16, v2bf16, f32]>;
24982498

24992499
def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>;
25002500

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ defm V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4",
396396
} // End OtherPredicates = [HasDot1Insts]
397397

398398
def DOT2_BF16_Profile
399-
: VOP3P_Profile<VOP_F32_V2I16_V2I16_F32, VOP3_REGULAR, /*HasDPP*/ 1> {
399+
: VOP3P_Profile<VOP_F32_V2BF16_V2BF16_F32, VOP3_REGULAR, /*HasDPP*/ 1> {
400400
let HasSrc1Mods = 1;
401401
}
402402

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11
33
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11
44

5-
declare float @llvm.amdgcn.fdot2.f32.bf16(<2 x i16> %a, <2 x i16> %b, float %c, i1 %clamp)
5+
declare float @llvm.amdgcn.fdot2.f32.bf16(<2 x bfloat> %a, <2 x bfloat> %b, float %c, i1 %clamp)
66

77
define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f32_bf16_clamp(
88
; GFX11-LABEL: test_llvm_amdgcn_fdot2_f32_bf16_clamp:
@@ -25,10 +25,10 @@ define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f32_bf16_clamp(
2525
ptr addrspace(1) %b,
2626
ptr addrspace(1) %c) {
2727
entry:
28-
%a.val = load <2 x i16>, ptr addrspace(1) %a
29-
%b.val = load <2 x i16>, ptr addrspace(1) %b
28+
%a.val = load <2 x bfloat>, ptr addrspace(1) %a
29+
%b.val = load <2 x bfloat>, ptr addrspace(1) %b
3030
%c.val = load float, ptr addrspace(1) %c
31-
%r.val = call float @llvm.amdgcn.fdot2.f32.bf16(<2 x i16> %a.val, <2 x i16> %b.val, float %c.val, i1 1)
31+
%r.val = call float @llvm.amdgcn.fdot2.f32.bf16(<2 x bfloat> %a.val, <2 x bfloat> %b.val, float %c.val, i1 1)
3232
store float %r.val, ptr addrspace(1) %r
3333
ret void
3434
}
@@ -55,10 +55,10 @@ define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f32_bf16_no_clamp(
5555
ptr addrspace(1) %b,
5656
ptr addrspace(1) %c) {
5757
entry:
58-
%a.val = load <2 x i16>, ptr addrspace(1) %a
59-
%b.val = load <2 x i16>, ptr addrspace(1) %b
58+
%a.val = load <2 x bfloat>, ptr addrspace(1) %a
59+
%b.val = load <2 x bfloat>, ptr addrspace(1) %b
6060
%c.val = load float, ptr addrspace(1) %c
61-
%r.val = call float @llvm.amdgcn.fdot2.f32.bf16(<2 x i16> %a.val, <2 x i16> %b.val, float %c.val, i1 0)
61+
%r.val = call float @llvm.amdgcn.fdot2.f32.bf16(<2 x bfloat> %a.val, <2 x bfloat> %b.val, float %c.val, i1 0)
6262
store float %r.val, ptr addrspace(1) %r
6363
ret void
6464
}

llvm/test/MC/AMDGPU/bf16_imm.s

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,12 @@ v_dot2_bf16_bf16 v2, 1.0, v0, v2
1212

1313
v_dot2_bf16_bf16 v5, v1, v2, 1.0
1414
// CHECK: v_dot2_bf16_bf16 v5, v1, v2, 1.0 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xca,0x03]
15+
16+
v_dot2_f32_bf16 v2, v1, 1.0, v2
17+
// CHECK: v_dot2_f32_bf16 v2, v1, 1.0, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xe5,0x09,0x1c]
18+
19+
v_dot2_f32_bf16 v2, 0.5, v1, v2
20+
// CHECK: v_dot2_f32_bf16 v2, 0.5, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xf0,0x02,0x0a,0x1c]
21+
22+
v_dot2_f32_bf16 v2, 100.0, v1, v2
23+
// CHECK: v_dot2_f32_bf16 v2, 0x42c8, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xff,0x02,0x0a,0x1c,0xc8,0x42,0x00,0x00]

llvm/test/MC/Disassembler/AMDGPU/bf16_imm.txt

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
# CHECK: v_dot2_bf16_bf16 v5, v1, v2, 0x42c8 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xfe,0x03,0xc8,0x42,0x00,0x00]
55
0x05,0x00,0x67,0xd6,0x01,0x05,0xfe,0x03,0xc8,0x42,0x00,0x00
66

7-
# FIXME: The decoded values of the following three cases are not correct because getInlineImmVal16 can't tell fp16 and bf16 apart.
7+
# CHECK: v_dot2_f32_bf16 v2, 0x42c8, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xff,0x02,0x0a,0x1c,0xc8,0x42,0x00,0x00]
8+
0x02,0x40,0x1a,0xcc,0xff,0x02,0x0a,0x1c,0xc8,0x42,0x00,0x00
9+
10+
# FIXME: The decoded values of the following cases are not correct because getInlineImmVal16 can't tell fp16 and bf16 apart.
811

912
# CHECK: v_dot2_bf16_bf16 v5, v1, v2, 0x3c00 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xfe,0x03,0x00,0x3c,0x00,0x00]
1013
0x05,0x00,0x67,0xd6,0x01,0x05,0xca,0x03
@@ -14,3 +17,9 @@
1417

1518
# CHECK: v_dot2_bf16_bf16 v2, 0x3c00, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x3c,0x00,0x00]
1619
0x02,0x00,0x67,0xd6,0xf2,0x00,0x0a,0x04
20+
21+
# CHECK: v_dot2_f32_bf16 v2, v1, 0x3c00, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0x01,0xff,0x09,0x1c,0x00,0x3c,0x00,0x00]
22+
0x02,0x40,0x1a,0xcc,0x01,0xe5,0x09,0x1c
23+
24+
# CHECK: v_dot2_f32_bf16 v2, 0x3800, v1, v2 ; encoding: [0x02,0x40,0x1a,0xcc,0xff,0x02,0x0a,0x1c,0x00,0x38,0x00,0x00]
25+
0x02,0x40,0x1a,0xcc,0xf0,0x02,0x0a,0x1c

0 commit comments

Comments
 (0)