Skip to content

Commit bea56b0

Browse files
committed
[AMDGPU] Have a subtarget feature to control use of real True16 instructions.
Real True16 instructions are as they are defined in the ISA. Fake True16 instructions are identical to real ones except that they take 32-bit registers as operands and always use their low halves. Reviewed By: Joe_Nash Differential Revision: https://reviews.llvm.org/D156100
1 parent 2388222 commit bea56b0

File tree

4 files changed

+27
-2
lines changed

4 files changed

+27
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,12 @@ def FeatureTrue16BitInsts : SubtargetFeature<"true16",
375375
"True 16-bit operand instructions"
376376
>;
377377

378+
def FeatureRealTrue16Insts : SubtargetFeature<"real-true16",
379+
"EnableRealTrue16Insts",
380+
"true",
381+
"Use true 16-bit registers"
382+
>;
383+
378384
def FeatureVOP3P : SubtargetFeature<"vop3p",
379385
"HasVOP3PInsts",
380386
"true",

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,10 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
167167

168168
AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) : TargetTriple(TT) {}
169169

170+
bool AMDGPUSubtarget::useRealTrue16Insts() const {
171+
return hasTrue16BitInsts() && EnableRealTrue16Insts;
172+
}
173+
170174
GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
171175
const GCNTargetMachine &TM)
172176
: // clang-format off

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ class AMDGPUSubtarget {
4949
bool GCN3Encoding = false;
5050
bool Has16BitInsts = false;
5151
bool HasTrue16BitInsts = false;
52+
bool EnableRealTrue16Insts = false;
5253
bool HasMadMixInsts = false;
5354
bool HasMadMacF32Insts = false;
5455
bool HasDsSrc2Insts = false;
@@ -153,8 +154,17 @@ class AMDGPUSubtarget {
153154
return Has16BitInsts;
154155
}
155156

157+
/// Return true if the subtarget supports True16 instructions.
156158
bool hasTrue16BitInsts() const { return HasTrue16BitInsts; }
157159

160+
/// Return true if real (non-fake) variants of True16 instructions using
161+
/// 16-bit registers should be code-generated. Fake True16 instructions are
162+
/// identical to non-fake ones except that they take 32-bit registers as
163+
/// operands and always use their low halves.
164+
// TODO: Remove and use hasTrue16BitInsts() instead once True16 is fully
165+
// supported and the support for fake True16 instructions is removed.
166+
bool useRealTrue16Insts() const;
167+
158168
bool hasMadMixInsts() const {
159169
return HasMadMixInsts;
160170
}

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
148148
addRegisterClass(MVT::v16f64, TRI->getVGPRClassForBitWidth(1024));
149149

150150
if (Subtarget->has16BitInsts()) {
151-
addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass);
152-
addRegisterClass(MVT::f16, &AMDGPU::SReg_32RegClass);
151+
if (Subtarget->useRealTrue16Insts()) {
152+
addRegisterClass(MVT::i16, &AMDGPU::VGPR_16RegClass);
153+
addRegisterClass(MVT::f16, &AMDGPU::VGPR_16RegClass);
154+
} else {
155+
addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass);
156+
addRegisterClass(MVT::f16, &AMDGPU::SReg_32RegClass);
157+
}
153158

154159
// Unless there are also VOP3P operations, not operations are really legal.
155160
addRegisterClass(MVT::v2i16, &AMDGPU::SReg_32RegClass);

0 commit comments

Comments
 (0)