Skip to content

Commit 2417de2

Browse files
[AMDGPU] Use d16 flag for image.sample instructions
Image.sample instruction can be forced to return half type instead of float when d16 flag is enabled. This patch adds new pattern in InstCombine to detect if output of image.sample is used later only by fptrunc which converts the type from float to half. If pattern is detected then fptrunc and image.sample are combined to single image.sample which is returning half type. Later in Lowering part d16 flag is added to image sample intrinsic. Differential Revision: https://reviews.llvm.org/D124232
1 parent 2407c13 commit 2417de2

File tree

2 files changed

+457
-17
lines changed

2 files changed

+457
-17
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 54 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -110,33 +110,42 @@ static Value *convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder) {
110110
llvm_unreachable("Should never be called!");
111111
}
112112

113-
/// Applies Function(II.Args, II.ArgTys) and replaces the intrinsic call with
114-
/// the modified arguments.
113+
/// Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with
114+
/// modified arguments (based on OldIntr) and replaces InstToReplace with
115+
/// this newly created intrinsic call.
115116
static Optional<Instruction *> modifyIntrinsicCall(
116-
IntrinsicInst &II, unsigned NewIntr, InstCombiner &IC,
117+
IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr,
118+
InstCombiner &IC,
117119
std::function<void(SmallVectorImpl<Value *> &, SmallVectorImpl<Type *> &)>
118120
Func) {
119121
SmallVector<Type *, 4> ArgTys;
120-
if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys))
122+
if (!Intrinsic::getIntrinsicSignature(OldIntr.getCalledFunction(), ArgTys))
121123
return None;
122124

123-
SmallVector<Value *, 8> Args(II.args());
125+
SmallVector<Value *, 8> Args(OldIntr.args());
124126

125127
// Modify arguments and types
126128
Func(Args, ArgTys);
127129

128-
Function *I = Intrinsic::getDeclaration(II.getModule(), NewIntr, ArgTys);
130+
Function *I = Intrinsic::getDeclaration(OldIntr.getModule(), NewIntr, ArgTys);
129131

130132
CallInst *NewCall = IC.Builder.CreateCall(I, Args);
131-
NewCall->takeName(&II);
132-
NewCall->copyMetadata(II);
133+
NewCall->takeName(&OldIntr);
134+
NewCall->copyMetadata(OldIntr);
133135
if (isa<FPMathOperator>(NewCall))
134-
NewCall->copyFastMathFlags(&II);
136+
NewCall->copyFastMathFlags(&OldIntr);
135137

136138
// Erase and replace uses
137-
if (!II.getType()->isVoidTy())
138-
IC.replaceInstUsesWith(II, NewCall);
139-
return IC.eraseInstFromFunction(II);
139+
if (!InstToReplace.getType()->isVoidTy())
140+
IC.replaceInstUsesWith(InstToReplace, NewCall);
141+
142+
bool RemoveOldIntr = &OldIntr != &InstToReplace;
143+
144+
auto RetValue = IC.eraseInstFromFunction(InstToReplace);
145+
if (RemoveOldIntr)
146+
IC.eraseInstFromFunction(OldIntr);
147+
148+
return RetValue;
140149
}
141150

142151
static Optional<Instruction *>
@@ -153,7 +162,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
153162
AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ,
154163
ImageDimIntr->Dim);
155164
return modifyIntrinsicCall(
156-
II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
165+
II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
157166
Args.erase(Args.begin() + ImageDimIntr->LodIndex);
158167
});
159168
}
@@ -170,7 +179,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
170179
AMDGPU::getImageDimIntrinsicByBaseOpcode(MIPMappingInfo->NONMIP,
171180
ImageDimIntr->Dim);
172181
return modifyIntrinsicCall(
173-
II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
182+
II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
174183
Args.erase(Args.begin() + ImageDimIntr->MipIndex);
175184
});
176185
}
@@ -187,7 +196,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
187196
AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias,
188197
ImageDimIntr->Dim);
189198
return modifyIntrinsicCall(
190-
II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
199+
II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
191200
Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
192201
ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
193202
});
@@ -205,13 +214,41 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
205214
AMDGPU::getImageDimIntrinsicByBaseOpcode(
206215
OffsetMappingInfo->NoOffset, ImageDimIntr->Dim);
207216
return modifyIntrinsicCall(
208-
II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
217+
II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
209218
Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
210219
});
211220
}
212221
}
213222
}
214223

224+
// Try to use D16
225+
if (ST->hasD16Images()) {
226+
227+
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
228+
AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode);
229+
230+
if (BaseOpcode->HasD16) {
231+
232+
// If the only use of image intrinsic is a fptrunc (with conversion to
233+
// half) then both fptrunc and image intrinsic will be replaced with image
234+
// intrinsic with D16 flag.
235+
if (II.hasOneUse()) {
236+
Instruction *User = II.user_back();
237+
238+
if (User->getOpcode() == Instruction::FPTrunc &&
239+
User->getType()->getScalarType()->isHalfTy()) {
240+
241+
return modifyIntrinsicCall(II, *User, ImageDimIntr->Intr, IC,
242+
[&](auto &Args, auto &ArgTys) {
243+
// Change return type of image intrinsic.
244+
// Set it to return type of fptrunc.
245+
ArgTys[0] = User->getType();
246+
});
247+
}
248+
}
249+
}
250+
}
251+
215252
// Try to use A16 or G16
216253
if (!ST->hasA16() && !ST->hasG16())
217254
return None;
@@ -263,7 +300,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
263300
: Type::getInt16Ty(II.getContext());
264301

265302
return modifyIntrinsicCall(
266-
II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {
303+
II, II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {
267304
ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
268305
if (!OnlyDerivatives) {
269306
ArgTys[ImageDimIntr->CoordTyArg] = CoordType;

0 commit comments

Comments
 (0)