Skip to content

Commit d0b5d32

Browse files
authored
[AMDGPU] Fixed byte_sel of v_cvt_f32_bf8/v_cvt_f32_fp8 (#80502)
Opsel bits are swapped. Actual byte select table: Byte OPSEL 0 0 1 2 2 1 3 3
1 parent b4c7152 commit d0b5d32

File tree

3 files changed

+8
-10
lines changed

3 files changed

+8
-10
lines changed

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -668,10 +668,8 @@ class Cvt_F32_F8_Pat_OpSel<SDPatternOperator node, bits<2> index,
668668
VOP1_Pseudo inst_e32, VOP3_Pseudo inst_e64> : GCNPat<
669669
(f32 (node i32:$src, index)),
670670
!if (index,
671-
(inst_e64 !if(index{0},
672-
!if(index{1}, !or(SRCMODS.OP_SEL_0, SRCMODS.OP_SEL_1),
673-
SRCMODS.OP_SEL_0),
674-
!if(index{1}, SRCMODS.OP_SEL_1, 0)),
671+
(inst_e64 !or(!if(index{0}, SRCMODS.OP_SEL_1, 0),
672+
!if(index{1}, SRCMODS.OP_SEL_0, 0)),
675673
$src, 0),
676674
(inst_e32 $src))
677675
>;

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ define amdgpu_cs float @test_cvt_f32_bf8_byte1(i32 %a) {
1616
; GFX12: ; %bb.0:
1717
; GFX12-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
1818
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
19-
; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 op_sel:[1,0]
19+
; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 op_sel:[0,1]
2020
; GFX12-NEXT: ; return to shader part epilog
2121
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
2222
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %tmp0, i32 1)
@@ -28,7 +28,7 @@ define amdgpu_cs float @test_cvt_f32_bf8_byte2(i32 %a) {
2828
; GFX12: ; %bb.0:
2929
; GFX12-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
3030
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
31-
; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 op_sel:[0,1]
31+
; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 op_sel:[1,0]
3232
; GFX12-NEXT: ; return to shader part epilog
3333
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
3434
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %tmp0, i32 2)

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ define float @test_cvt_f32_bf8_byte1(i32 %a) {
4545
; GFX12-NEXT: s_wait_samplecnt 0x0
4646
; GFX12-NEXT: s_wait_bvhcnt 0x0
4747
; GFX12-NEXT: s_wait_kmcnt 0x0
48-
; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 op_sel:[1,0]
48+
; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 op_sel:[0,1]
4949
; GFX12-NEXT: s_setpc_b64 s[30:31]
5050
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 1)
5151
ret float %ret
@@ -65,7 +65,7 @@ define float @test_cvt_f32_bf8_byte2(i32 %a) {
6565
; GFX12-NEXT: s_wait_samplecnt 0x0
6666
; GFX12-NEXT: s_wait_bvhcnt 0x0
6767
; GFX12-NEXT: s_wait_kmcnt 0x0
68-
; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 op_sel:[0,1]
68+
; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 op_sel:[1,0]
6969
; GFX12-NEXT: s_setpc_b64 s[30:31]
7070
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 2)
7171
ret float %ret
@@ -125,7 +125,7 @@ define float @test_cvt_f32_fp8_byte1(i32 %a) {
125125
; GFX12-NEXT: s_wait_samplecnt 0x0
126126
; GFX12-NEXT: s_wait_bvhcnt 0x0
127127
; GFX12-NEXT: s_wait_kmcnt 0x0
128-
; GFX12-NEXT: v_cvt_f32_fp8_e64 v0, v0 op_sel:[1,0]
128+
; GFX12-NEXT: v_cvt_f32_fp8_e64 v0, v0 op_sel:[0,1]
129129
; GFX12-NEXT: s_setpc_b64 s[30:31]
130130
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 1)
131131
ret float %ret
@@ -145,7 +145,7 @@ define float @test_cvt_f32_fp8_byte2(i32 %a) {
145145
; GFX12-NEXT: s_wait_samplecnt 0x0
146146
; GFX12-NEXT: s_wait_bvhcnt 0x0
147147
; GFX12-NEXT: s_wait_kmcnt 0x0
148-
; GFX12-NEXT: v_cvt_f32_fp8_e64 v0, v0 op_sel:[0,1]
148+
; GFX12-NEXT: v_cvt_f32_fp8_e64 v0, v0 op_sel:[1,0]
149149
; GFX12-NEXT: s_setpc_b64 s[30:31]
150150
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 2)
151151
ret float %ret

0 commit comments

Comments
 (0)