Skip to content

Commit a51798e

Browse files
authored
[AMDGPU][True16][CodeGen] true16 codegen pat for fptrunc_round (#124044)
true16 codegen pattern for fptrunc_round f32 to f16. For mir test, split to preGFX11 and postGFX11. and add a true16 and a fake16 test accordingly
1 parent 7ceef1b commit a51798e

File tree

7 files changed

+672
-236
lines changed

7 files changed

+672
-236
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,16 +228,39 @@ def S_INVERSE_BALLOT_U64 : SPseudoInstSI<
228228
// Pseudo instructions used for @llvm.fptrunc.round. The final codegen is done
229229
// in the ModeRegister pass.
230230
let Uses = [MODE, EXEC] in {
231+
let True16Predicate = NotHasTrue16BitInsts in
231232
def FPTRUNC_ROUND_F16_F32_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
232233
(ins VGPR_32:$src0, i32imm:$round)>;
233234

235+
let True16Predicate = UseFakeTrue16Insts in
236+
def FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 : VPseudoInstSI <(outs VGPR_32:$vdst),
237+
(ins VGPR_32:$src0, i32imm:$round)>;
238+
239+
let True16Predicate = UseRealTrue16Insts in
240+
// The operands of these pseudos should match V_CVT_F16_F32_t16_e64
241+
def FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 : VPseudoInstSI <(outs VOPDstOperand_t16:$vdst),
242+
(ins FP32InputMods:$src0_modifiers, VSrc_f32:$src0, Clamp0:$clamp, omod0:$omod, op_sel0:$op_sel, i32imm:$round)> {
243+
let FPClamp = 1;
244+
let ClampLo = 1;
245+
let UseNamedOperandTable = 1;
246+
}
247+
234248
def FPTRUNC_ROUND_F32_F64_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
235249
(ins VReg_64:$src0, i32imm:$round)>;
236250
} // End Uses = [MODE, EXEC]
237251

252+
let True16Predicate = NotHasTrue16BitInsts in
238253
def : GCNPat <(f16 (fptrunc_round f32:$src0, (i32 SupportedRoundMode:$round))),
239254
(FPTRUNC_ROUND_F16_F32_PSEUDO $src0, (as_hw_round_mode $round))>;
240255

256+
let True16Predicate = UseFakeTrue16Insts in
257+
def : GCNPat <(f16 (fptrunc_round f32:$src0, (i32 SupportedRoundMode:$round))),
258+
(FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $src0, (as_hw_round_mode $round))>;
259+
260+
let True16Predicate = UseRealTrue16Insts in
261+
def : GCNPat <(f16 (fptrunc_round (f32 (VOP3OpSelMods f32:$src0, i32:$src0_modifiers)), (i32 SupportedRoundMode:$round))),
262+
(FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 $src0_modifiers, $src0, (as_hw_round_mode $round))>;
263+
241264
def : GCNPat <(f32 (fptrunc_round f64:$src0, (i32 SupportedRoundMode:$round))),
242265
(FPTRUNC_ROUND_F32_F64_PSEUDO $src0, (as_hw_round_mode $round))>;
243266

llvm/lib/Target/AMDGPU/SIModeRegister.cpp

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,8 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
166166
unsigned Opcode = MI.getOpcode();
167167
if (TII->usesFPDPRounding(MI) ||
168168
Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO ||
169+
Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 ||
170+
Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 ||
169171
Opcode == AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO) {
170172
switch (Opcode) {
171173
case AMDGPU::V_INTERP_P1LL_F16:
@@ -177,19 +179,19 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
177179
case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO: {
178180
unsigned Mode = MI.getOperand(2).getImm();
179181
MI.removeOperand(2);
180-
// Replacing the pseudo by a real instruction in place
181-
if (TII->getSubtarget().hasTrue16BitInsts()) {
182-
MachineBasicBlock &MBB = *MI.getParent();
183-
MachineInstrBuilder B(*MBB.getParent(), MI);
184-
MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_fake16_e64));
185-
MachineOperand Src0 = MI.getOperand(1);
186-
MI.removeOperand(1);
187-
B.addImm(0); // src0_modifiers
188-
B.add(Src0); // re-add src0 operand
189-
B.addImm(0); // clamp
190-
B.addImm(0); // omod
191-
} else
192-
MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
182+
MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
183+
return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
184+
}
185+
case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32: {
186+
unsigned Mode = MI.getOperand(2).getImm();
187+
MI.removeOperand(2);
188+
MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_fake16_e32));
189+
return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
190+
}
191+
case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64: {
192+
unsigned Mode = MI.getOperand(6).getImm();
193+
MI.removeOperand(6);
194+
MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_t16_e64));
193195
return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
194196
}
195197
case AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO: {
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=GFX11
3+
4+
---
5+
name: ftrunc_tonearest
6+
7+
body: |
8+
bb.0:
9+
liveins: $sgpr0
10+
; GFX11-LABEL: name: ftrunc_tonearest
11+
; GFX11: liveins: $sgpr0
12+
; GFX11-NEXT: {{ $}}
13+
; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
14+
; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e32 $vgpr0, implicit $mode, implicit $exec
15+
; GFX11-NEXT: S_ENDPGM 0
16+
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
17+
$vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $vgpr0, 0, implicit $mode, implicit $exec
18+
S_ENDPGM 0
19+
...
20+
21+
---
22+
name: ftrunc_upward
23+
24+
body: |
25+
bb.0:
26+
liveins: $sgpr0
27+
; GFX11-LABEL: name: ftrunc_upward
28+
; GFX11: liveins: $sgpr0
29+
; GFX11-NEXT: {{ $}}
30+
; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
31+
; GFX11-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode
32+
; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e32 $vgpr0, implicit $mode, implicit $exec
33+
; GFX11-NEXT: S_ENDPGM 0
34+
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
35+
$vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $vgpr0, 1, implicit $mode, implicit $exec
36+
S_ENDPGM 0
37+
...
38+
39+
---
40+
name: ftrunc_downward
41+
42+
body: |
43+
bb.0:
44+
liveins: $sgpr0
45+
; GFX11-LABEL: name: ftrunc_downward
46+
; GFX11: liveins: $sgpr0
47+
; GFX11-NEXT: {{ $}}
48+
; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
49+
; GFX11-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode
50+
; GFX11-NEXT: $vgpr0 = V_CVT_F16_F32_fake16_e32 $vgpr1, implicit $mode, implicit $exec
51+
; GFX11-NEXT: S_ENDPGM 0
52+
$vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
53+
$vgpr0 = FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $vgpr1, 2, implicit $mode, implicit $exec
54+
S_ENDPGM 0
55+
...
56+
57+
---
58+
name: ftrunc_towardzero
59+
60+
body: |
61+
bb.0:
62+
liveins: $sgpr0
63+
; GFX11-LABEL: name: ftrunc_towardzero
64+
; GFX11: liveins: $sgpr0
65+
; GFX11-NEXT: {{ $}}
66+
; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
67+
; GFX11-NEXT: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
68+
; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e32 $vgpr0, implicit $mode, implicit $exec
69+
; GFX11-NEXT: S_ENDPGM 0
70+
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
71+
$vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $vgpr0, 3, implicit $mode, implicit $exec
72+
S_ENDPGM 0
73+
...
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=GFX11
3+
4+
---
5+
name: ftrunc_tonearest
6+
7+
body: |
8+
bb.0:
9+
liveins: $sgpr0
10+
; GFX11-LABEL: name: ftrunc_tonearest
11+
; GFX11: liveins: $sgpr0
12+
; GFX11-NEXT: {{ $}}
13+
; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
14+
; GFX11-NEXT: $vgpr1_hi16 = V_CVT_F16_F32_t16_e64 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
15+
; GFX11-NEXT: S_ENDPGM 0
16+
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
17+
$vgpr1_hi16 = FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 0, $vgpr0, 0, 0, 0, 0, implicit $mode, implicit $exec
18+
S_ENDPGM 0
19+
...
20+
21+
---
22+
name: ftrunc_upward
23+
24+
body: |
25+
bb.0:
26+
liveins: $sgpr0
27+
; GFX11-LABEL: name: ftrunc_upward
28+
; GFX11: liveins: $sgpr0
29+
; GFX11-NEXT: {{ $}}
30+
; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
31+
; GFX11-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode
32+
; GFX11-NEXT: $vgpr1_hi16 = V_CVT_F16_F32_t16_e64 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
33+
; GFX11-NEXT: S_ENDPGM 0
34+
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
35+
$vgpr1_hi16 = FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 0, $vgpr0, 0, 0, 0, 1, implicit $mode, implicit $exec
36+
S_ENDPGM 0
37+
...
38+
39+
---
40+
name: ftrunc_downward
41+
42+
body: |
43+
bb.0:
44+
liveins: $sgpr0
45+
; GFX11-LABEL: name: ftrunc_downward
46+
; GFX11: liveins: $sgpr0
47+
; GFX11-NEXT: {{ $}}
48+
; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
49+
; GFX11-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode
50+
; GFX11-NEXT: $vgpr0_hi16 = V_CVT_F16_F32_t16_e64 0, $vgpr1, 0, 0, 0, implicit $mode, implicit $exec
51+
; GFX11-NEXT: S_ENDPGM 0
52+
$vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
53+
$vgpr0_hi16 = FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 0, $vgpr1, 0, 0, 0, 2, implicit $mode, implicit $exec
54+
S_ENDPGM 0
55+
...
56+
57+
---
58+
name: ftrunc_towardzero
59+
60+
body: |
61+
bb.0:
62+
liveins: $sgpr0
63+
; GFX11-LABEL: name: ftrunc_towardzero
64+
; GFX11: liveins: $sgpr0
65+
; GFX11-NEXT: {{ $}}
66+
; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
67+
; GFX11-NEXT: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
68+
; GFX11-NEXT: $vgpr1_hi16 = V_CVT_F16_F32_t16_e64 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
69+
; GFX11-NEXT: S_ENDPGM 0
70+
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
71+
$vgpr1_hi16 = FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 0, $vgpr0, 0, 0, 0, 3, implicit $mode, implicit $exec
72+
S_ENDPGM 0
73+
...

llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=CHECK
33
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=CHECK
4-
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=GFX11
54

65
---
76
name: ftrunc_tonearest
@@ -15,13 +14,6 @@ body: |
1514
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
1615
; CHECK-NEXT: $vgpr1 = V_CVT_F16_F32_e32 $vgpr0, implicit $mode, implicit $exec
1716
; CHECK-NEXT: S_ENDPGM 0
18-
;
19-
; GFX11-LABEL: name: ftrunc_tonearest
20-
; GFX11: liveins: $sgpr0
21-
; GFX11-NEXT: {{ $}}
22-
; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
23-
; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e64 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
24-
; GFX11-NEXT: S_ENDPGM 0
2517
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
2618
$vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO $vgpr0, 0, implicit $mode, implicit $exec
2719
S_ENDPGM 0
@@ -39,14 +31,6 @@ body: |
3931
; CHECK-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode
4032
; CHECK-NEXT: $vgpr1 = V_CVT_F16_F32_e32 $vgpr0, implicit $mode, implicit $exec
4133
; CHECK-NEXT: S_ENDPGM 0
42-
;
43-
; GFX11-LABEL: name: ftrunc_upward
44-
; GFX11: liveins: $sgpr0
45-
; GFX11-NEXT: {{ $}}
46-
; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
47-
; GFX11-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode
48-
; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e64 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
49-
; GFX11-NEXT: S_ENDPGM 0
5034
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
5135
$vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO $vgpr0, 1, implicit $mode, implicit $exec
5236
S_ENDPGM 0
@@ -64,14 +48,6 @@ body: |
6448
; CHECK-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode
6549
; CHECK-NEXT: $vgpr0 = V_CVT_F16_F32_e32 $vgpr1, implicit $mode, implicit $exec
6650
; CHECK-NEXT: S_ENDPGM 0
67-
;
68-
; GFX11-LABEL: name: ftrunc_downward
69-
; GFX11: liveins: $sgpr0
70-
; GFX11-NEXT: {{ $}}
71-
; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
72-
; GFX11-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode
73-
; GFX11-NEXT: $vgpr0 = V_CVT_F16_F32_fake16_e64 0, $vgpr1, 0, 0, implicit $mode, implicit $exec
74-
; GFX11-NEXT: S_ENDPGM 0
7551
$vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
7652
$vgpr0 = FPTRUNC_ROUND_F16_F32_PSEUDO $vgpr1, 2, implicit $mode, implicit $exec
7753
S_ENDPGM 0
@@ -89,14 +65,6 @@ body: |
8965
; CHECK-NEXT: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
9066
; CHECK-NEXT: $vgpr1 = V_CVT_F16_F32_e32 $vgpr0, implicit $mode, implicit $exec
9167
; CHECK-NEXT: S_ENDPGM 0
92-
;
93-
; GFX11-LABEL: name: ftrunc_towardzero
94-
; GFX11: liveins: $sgpr0
95-
; GFX11-NEXT: {{ $}}
96-
; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
97-
; GFX11-NEXT: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
98-
; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e64 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
99-
; GFX11-NEXT: S_ENDPGM 0
10068
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
10169
$vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO $vgpr0, 3, implicit $mode, implicit $exec
10270
S_ENDPGM 0

0 commit comments

Comments
 (0)