Skip to content

Commit 27734be

Browse files
perlfushiltian
authored andcommitted
[AMDGPU] Disable inline constants for pseudo scalar transcendentals (llvm#104395)
Prevent operand folding from inlining constants into pseudo scalar transcendental f16 instructions. However still allow literal constants. (cherry picked from commit fc6300a) Change-Id: I5cd412741939cc812150dbb24bd2735a64573b70
1 parent d0eeb21 commit 27734be

File tree

4 files changed

+138
-0
lines changed

4 files changed

+138
-0
lines changed

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1218,6 +1218,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
12181218
/// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
12191219
bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
12201220

1221+
/// \returns true if inline constants are not supported for F16 pseudo
1222+
/// scalar transcendentals.
1223+
bool hasNoF16PseudoScalarTransInlineConstants() const {
1224+
return getGeneration() == GFX12;
1225+
}
1226+
12211227
/// \returns The maximum number of instructions that can be enclosed in an
12221228
/// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
12231229
/// instruction.

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5804,6 +5804,10 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
58045804
return false;
58055805
}
58065806
}
5807+
} else if (ST.hasNoF16PseudoScalarTransInlineConstants() && !MO->isReg() &&
5808+
isF16PseudoScalarTrans(MI.getOpcode()) &&
5809+
isInlineConstant(*MO, OpInfo)) {
5810+
return false;
58075811
}
58085812

58095813
if (MO->isReg()) {

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -918,6 +918,14 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
918918
return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform;
919919
}
920920

921+
static bool isF16PseudoScalarTrans(unsigned Opcode) {
922+
return Opcode == AMDGPU::V_S_EXP_F16_e64 ||
923+
Opcode == AMDGPU::V_S_LOG_F16_e64 ||
924+
Opcode == AMDGPU::V_S_RCP_F16_e64 ||
925+
Opcode == AMDGPU::V_S_RSQ_F16_e64 ||
926+
Opcode == AMDGPU::V_S_SQRT_F16_e64;
927+
}
928+
921929
static bool doesNotReadTiedSource(const MachineInstr &MI) {
922930
return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
923931
}
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s
3+
4+
# Do not use inline constants for f16 pseudo scalar transcendentals.
5+
# But allow literal constants.
6+
7+
---
8+
name: exp_f16_imm
9+
tracksRegLiveness: true
10+
body: |
11+
bb.0:
12+
; GCN-LABEL: name: exp_f16_imm
13+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
14+
; GCN-NEXT: [[V_S_EXP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_EXP_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
15+
%0:sgpr_32 = S_MOV_B32 15360
16+
%1:sgpr_32 = V_S_EXP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
17+
...
18+
19+
---
20+
name: exp_f16_literal
21+
tracksRegLiveness: true
22+
body: |
23+
bb.0:
24+
; GCN-LABEL: name: exp_f16_literal
25+
; GCN: [[V_S_EXP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_EXP_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
26+
%0:sgpr_32 = S_MOV_B32 16960
27+
%1:sgpr_32 = V_S_EXP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
28+
...
29+
30+
---
31+
name: log_f16_imm
32+
tracksRegLiveness: true
33+
body: |
34+
bb.0:
35+
; GCN-LABEL: name: log_f16_imm
36+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
37+
; GCN-NEXT: [[V_S_LOG_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_LOG_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
38+
%0:sgpr_32 = S_MOV_B32 15360
39+
%1:sgpr_32 = V_S_LOG_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
40+
...
41+
42+
---
43+
name: log_f16_literal
44+
tracksRegLiveness: true
45+
body: |
46+
bb.0:
47+
; GCN-LABEL: name: log_f16_literal
48+
; GCN: [[V_S_LOG_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_LOG_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
49+
%0:sgpr_32 = S_MOV_B32 16960
50+
%1:sgpr_32 = V_S_LOG_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
51+
...
52+
53+
---
54+
name: rcp_f16_imm
55+
tracksRegLiveness: true
56+
body: |
57+
bb.0:
58+
; GCN-LABEL: name: rcp_f16_imm
59+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
60+
; GCN-NEXT: [[V_S_RCP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RCP_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
61+
%0:sgpr_32 = S_MOV_B32 15360
62+
%1:sgpr_32 = V_S_RCP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
63+
...
64+
65+
---
66+
name: rcp_f16_literal
67+
tracksRegLiveness: true
68+
body: |
69+
bb.0:
70+
; GCN-LABEL: name: rcp_f16_literal
71+
; GCN: [[V_S_RCP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RCP_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
72+
%0:sgpr_32 = S_MOV_B32 16960
73+
%1:sgpr_32 = V_S_RCP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
74+
...
75+
76+
---
77+
name: rsq_f16_imm
78+
tracksRegLiveness: true
79+
body: |
80+
bb.0:
81+
; GCN-LABEL: name: rsq_f16_imm
82+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
83+
; GCN-NEXT: [[V_S_RSQ_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RSQ_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
84+
%0:sgpr_32 = S_MOV_B32 15360
85+
%1:sgpr_32 = V_S_RSQ_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
86+
...
87+
88+
---
89+
name: rsq_f16_literal
90+
tracksRegLiveness: true
91+
body: |
92+
bb.0:
93+
; GCN-LABEL: name: rsq_f16_literal
94+
; GCN: [[V_S_RSQ_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RSQ_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
95+
%0:sgpr_32 = S_MOV_B32 16960
96+
%1:sgpr_32 = V_S_RSQ_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
97+
...
98+
99+
---
100+
name: sqrt_f16_imm
101+
tracksRegLiveness: true
102+
body: |
103+
bb.0:
104+
; GCN-LABEL: name: sqrt_f16_imm
105+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
106+
; GCN-NEXT: [[V_S_SQRT_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_SQRT_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
107+
%0:sgpr_32 = S_MOV_B32 15360
108+
%1:sgpr_32 = V_S_SQRT_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
109+
...
110+
111+
---
112+
name: sqrt_f16_literal
113+
tracksRegLiveness: true
114+
body: |
115+
bb.0:
116+
; GCN-LABEL: name: sqrt_f16_literal
117+
; GCN: [[V_S_SQRT_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_SQRT_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
118+
%0:sgpr_32 = S_MOV_B32 16960
119+
%1:sgpr_32 = V_S_SQRT_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
120+
...

0 commit comments

Comments
 (0)