@@ -110,33 +110,42 @@ static Value *convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder) {
110
110
llvm_unreachable (" Should never be called!" );
111
111
}
112
112
113
- // / Applies Function(II.Args, II.ArgTys) and replaces the intrinsic call with
114
- // / the modified arguments.
113
+ // / Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with
114
+ // / modified arguments (based on OldIntr) and replaces InstToReplace with
115
+ // / this newly created intrinsic call.
115
116
static Optional<Instruction *> modifyIntrinsicCall (
116
- IntrinsicInst &II, unsigned NewIntr, InstCombiner &IC,
117
+ IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr,
118
+ InstCombiner &IC,
117
119
std::function<void (SmallVectorImpl<Value *> &, SmallVectorImpl<Type *> &)>
118
120
Func) {
119
121
SmallVector<Type *, 4 > ArgTys;
120
- if (!Intrinsic::getIntrinsicSignature (II .getCalledFunction (), ArgTys))
122
+ if (!Intrinsic::getIntrinsicSignature (OldIntr .getCalledFunction (), ArgTys))
121
123
return None;
122
124
123
- SmallVector<Value *, 8 > Args (II .args ());
125
+ SmallVector<Value *, 8 > Args (OldIntr .args ());
124
126
125
127
// Modify arguments and types
126
128
Func (Args, ArgTys);
127
129
128
- Function *I = Intrinsic::getDeclaration (II .getModule (), NewIntr, ArgTys);
130
+ Function *I = Intrinsic::getDeclaration (OldIntr .getModule (), NewIntr, ArgTys);
129
131
130
132
CallInst *NewCall = IC.Builder .CreateCall (I, Args);
131
- NewCall->takeName (&II );
132
- NewCall->copyMetadata (II );
133
+ NewCall->takeName (&OldIntr );
134
+ NewCall->copyMetadata (OldIntr );
133
135
if (isa<FPMathOperator>(NewCall))
134
- NewCall->copyFastMathFlags (&II );
136
+ NewCall->copyFastMathFlags (&OldIntr );
135
137
136
138
// Erase and replace uses
137
- if (!II.getType ()->isVoidTy ())
138
- IC.replaceInstUsesWith (II, NewCall);
139
- return IC.eraseInstFromFunction (II);
139
+ if (!InstToReplace.getType ()->isVoidTy ())
140
+ IC.replaceInstUsesWith (InstToReplace, NewCall);
141
+
142
+ bool RemoveOldIntr = &OldIntr != &InstToReplace;
143
+
144
+ auto RetValue = IC.eraseInstFromFunction (InstToReplace);
145
+ if (RemoveOldIntr)
146
+ IC.eraseInstFromFunction (OldIntr);
147
+
148
+ return RetValue;
140
149
}
141
150
142
151
static Optional<Instruction *>
@@ -153,7 +162,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
153
162
AMDGPU::getImageDimIntrinsicByBaseOpcode (LZMappingInfo->LZ ,
154
163
ImageDimIntr->Dim );
155
164
return modifyIntrinsicCall (
156
- II, NewImageDimIntr->Intr , IC, [&](auto &Args, auto &ArgTys) {
165
+ II, II, NewImageDimIntr->Intr , IC, [&](auto &Args, auto &ArgTys) {
157
166
Args.erase (Args.begin () + ImageDimIntr->LodIndex );
158
167
});
159
168
}
@@ -170,7 +179,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
170
179
AMDGPU::getImageDimIntrinsicByBaseOpcode (MIPMappingInfo->NONMIP ,
171
180
ImageDimIntr->Dim );
172
181
return modifyIntrinsicCall (
173
- II, NewImageDimIntr->Intr , IC, [&](auto &Args, auto &ArgTys) {
182
+ II, II, NewImageDimIntr->Intr , IC, [&](auto &Args, auto &ArgTys) {
174
183
Args.erase (Args.begin () + ImageDimIntr->MipIndex );
175
184
});
176
185
}
@@ -187,7 +196,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
187
196
AMDGPU::getImageDimIntrinsicByBaseOpcode (BiasMappingInfo->NoBias ,
188
197
ImageDimIntr->Dim );
189
198
return modifyIntrinsicCall (
190
- II, NewImageDimIntr->Intr , IC, [&](auto &Args, auto &ArgTys) {
199
+ II, II, NewImageDimIntr->Intr , IC, [&](auto &Args, auto &ArgTys) {
191
200
Args.erase (Args.begin () + ImageDimIntr->BiasIndex );
192
201
ArgTys.erase (ArgTys.begin () + ImageDimIntr->BiasTyArg );
193
202
});
@@ -205,13 +214,41 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
205
214
AMDGPU::getImageDimIntrinsicByBaseOpcode (
206
215
OffsetMappingInfo->NoOffset , ImageDimIntr->Dim );
207
216
return modifyIntrinsicCall (
208
- II, NewImageDimIntr->Intr , IC, [&](auto &Args, auto &ArgTys) {
217
+ II, II, NewImageDimIntr->Intr , IC, [&](auto &Args, auto &ArgTys) {
209
218
Args.erase (Args.begin () + ImageDimIntr->OffsetIndex );
210
219
});
211
220
}
212
221
}
213
222
}
214
223
224
+ // Try to use D16
225
+ if (ST->hasD16Images ()) {
226
+
227
+ const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
228
+ AMDGPU::getMIMGBaseOpcodeInfo (ImageDimIntr->BaseOpcode );
229
+
230
+ if (BaseOpcode->HasD16 ) {
231
+
232
+ // If the only use of image intrinsic is a fptrunc (with conversion to
233
+ // half) then both fptrunc and image intrinsic will be replaced with image
234
+ // intrinsic with D16 flag.
235
+ if (II.hasOneUse ()) {
236
+ Instruction *User = II.user_back ();
237
+
238
+ if (User->getOpcode () == Instruction::FPTrunc &&
239
+ User->getType ()->getScalarType ()->isHalfTy ()) {
240
+
241
+ return modifyIntrinsicCall (II, *User, ImageDimIntr->Intr , IC,
242
+ [&](auto &Args, auto &ArgTys) {
243
+ // Change return type of image intrinsic.
244
+ // Set it to return type of fptrunc.
245
+ ArgTys[0 ] = User->getType ();
246
+ });
247
+ }
248
+ }
249
+ }
250
+ }
251
+
215
252
// Try to use A16 or G16
216
253
if (!ST->hasA16 () && !ST->hasG16 ())
217
254
return None;
@@ -263,7 +300,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
263
300
: Type::getInt16Ty (II.getContext ());
264
301
265
302
return modifyIntrinsicCall (
266
- II, II.getIntrinsicID (), IC, [&](auto &Args, auto &ArgTys) {
303
+ II, II, II .getIntrinsicID (), IC, [&](auto &Args, auto &ArgTys) {
267
304
ArgTys[ImageDimIntr->GradientTyArg ] = CoordType;
268
305
if (!OnlyDerivatives) {
269
306
ArgTys[ImageDimIntr->CoordTyArg ] = CoordType;
0 commit comments