@@ -80,6 +80,10 @@ class SIFoldOperands : public MachineFunctionPass {
80
80
81
81
bool updateOperand (FoldCandidate &Fold) const ;
82
82
83
+ bool canUseImmWithOpSel (FoldCandidate &Fold) const ;
84
+
85
+ bool tryFoldImmWithOpSel (FoldCandidate &Fold) const ;
86
+
83
87
bool tryAddToFoldList (SmallVectorImpl<FoldCandidate> &FoldList,
84
88
MachineInstr *MI, unsigned OpNo,
85
89
MachineOperand *OpToFold) const ;
@@ -196,62 +200,85 @@ FunctionPass *llvm::createSIFoldOperandsPass() {
196
200
return new SIFoldOperands ();
197
201
}
198
202
199
- bool SIFoldOperands::updateOperand (FoldCandidate &Fold) const {
203
+ bool SIFoldOperands::canUseImmWithOpSel (FoldCandidate &Fold) const {
200
204
MachineInstr *MI = Fold.UseMI ;
201
205
MachineOperand &Old = MI->getOperand (Fold.UseOpNo );
202
- assert (Old. isReg ()) ;
206
+ const uint64_t TSFlags = MI-> getDesc (). TSFlags ;
203
207
208
+ assert (Old.isReg () && Fold.isImm ());
204
209
205
- const uint64_t TSFlags = MI->getDesc ().TSFlags ;
206
- if (Fold.isImm ()) {
207
- if (TSFlags & SIInstrFlags::IsPacked && !(TSFlags & SIInstrFlags::IsMAI) &&
208
- AMDGPU::isFoldableLiteralV216 (Fold.ImmToFold ,
209
- ST->hasInv2PiInlineImm ())) {
210
- if (ST->hasDOTOpSelHazard () && (TSFlags & SIInstrFlags::IsDOT))
211
- return false ; // Prevent further folding of this operand without opsel.
212
-
213
- // Set op_sel/op_sel_hi on this operand or bail out if op_sel is
214
- // already set.
215
- unsigned Opcode = MI->getOpcode ();
216
- int OpNo = MI->getOperandNo (&Old);
217
- int ModIdx = -1 ;
218
- if (OpNo == AMDGPU::getNamedOperandIdx (Opcode, AMDGPU::OpName::src0))
219
- ModIdx = AMDGPU::OpName::src0_modifiers;
220
- else if (OpNo == AMDGPU::getNamedOperandIdx (Opcode, AMDGPU::OpName::src1))
221
- ModIdx = AMDGPU::OpName::src1_modifiers;
222
- else if (OpNo == AMDGPU::getNamedOperandIdx (Opcode, AMDGPU::OpName::src2))
223
- ModIdx = AMDGPU::OpName::src2_modifiers;
224
- assert (ModIdx != -1 );
225
- ModIdx = AMDGPU::getNamedOperandIdx (Opcode, ModIdx);
226
- MachineOperand &Mod = MI->getOperand (ModIdx);
227
- unsigned Val = Mod.getImm ();
228
- if (!(Val & SISrcMods::OP_SEL_0) && (Val & SISrcMods::OP_SEL_1)) {
229
- // Only apply the following transformation if that operand requires
230
- // a packed immediate.
231
- switch (TII->get (Opcode).operands ()[OpNo].OperandType ) {
232
- case AMDGPU::OPERAND_REG_IMM_V2FP16:
233
- case AMDGPU::OPERAND_REG_IMM_V2INT16:
234
- case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
235
- case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
236
- // If upper part is all zero we do not need op_sel_hi.
237
- if (!isUInt<16 >(Fold.ImmToFold )) {
238
- if (!(Fold.ImmToFold & 0xffff )) {
239
- Mod.setImm (Mod.getImm () | SISrcMods::OP_SEL_0);
240
- Mod.setImm (Mod.getImm () & ~SISrcMods::OP_SEL_1);
241
- Old.ChangeToImmediate ((Fold.ImmToFold >> 16 ) & 0xffff );
242
- return true ;
243
- }
244
- Mod.setImm (Mod.getImm () & ~SISrcMods::OP_SEL_1);
245
- Old.ChangeToImmediate (Fold.ImmToFold & 0xffff );
246
- return true ;
247
- }
248
- break ;
249
- default :
250
- break ;
251
- }
252
- }
253
- }
210
+ if (!(TSFlags & SIInstrFlags::IsPacked) || (TSFlags & SIInstrFlags::IsMAI) ||
211
+ (ST->hasDOTOpSelHazard () && (TSFlags & SIInstrFlags::IsDOT)) ||
212
+ isUInt<16 >(Fold.ImmToFold ) ||
213
+ !AMDGPU::isFoldableLiteralV216 (Fold.ImmToFold , ST->hasInv2PiInlineImm ()))
214
+ return false ;
215
+
216
+ unsigned Opcode = MI->getOpcode ();
217
+ int OpNo = MI->getOperandNo (&Old);
218
+ uint8_t OpType = TII->get (Opcode).operands ()[OpNo].OperandType ;
219
+ switch (OpType) {
220
+ default :
221
+ return false ;
222
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
223
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
224
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
225
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
226
+ break ;
227
+ }
228
+
229
+ return true ;
230
+ }
231
+
232
+ bool SIFoldOperands::tryFoldImmWithOpSel (FoldCandidate &Fold) const {
233
+ MachineInstr *MI = Fold.UseMI ;
234
+ MachineOperand &Old = MI->getOperand (Fold.UseOpNo );
235
+ unsigned Opcode = MI->getOpcode ();
236
+ int OpNo = MI->getOperandNo (&Old);
237
+
238
+ // Set op_sel/op_sel_hi on this operand or bail out if op_sel is
239
+ // already set.
240
+ int ModIdx = -1 ;
241
+ if (OpNo == AMDGPU::getNamedOperandIdx (Opcode, AMDGPU::OpName::src0))
242
+ ModIdx = AMDGPU::OpName::src0_modifiers;
243
+ else if (OpNo == AMDGPU::getNamedOperandIdx (Opcode, AMDGPU::OpName::src1))
244
+ ModIdx = AMDGPU::OpName::src1_modifiers;
245
+ else if (OpNo == AMDGPU::getNamedOperandIdx (Opcode, AMDGPU::OpName::src2))
246
+ ModIdx = AMDGPU::OpName::src2_modifiers;
247
+ assert (ModIdx != -1 );
248
+ ModIdx = AMDGPU::getNamedOperandIdx (Opcode, ModIdx);
249
+ MachineOperand &Mod = MI->getOperand (ModIdx);
250
+ unsigned Val = Mod.getImm ();
251
+ if ((Val & SISrcMods::OP_SEL_0) || !(Val & SISrcMods::OP_SEL_1))
252
+ return false ;
253
+
254
+ // Only apply the following transformation if that operand requires
255
+ // a packed immediate.
256
+ // If upper part is all zero we do not need op_sel_hi.
257
+ if (!(Fold.ImmToFold & 0xffff )) {
258
+ MachineOperand New =
259
+ MachineOperand::CreateImm ((Fold.ImmToFold >> 16 ) & 0xffff );
260
+ if (!TII->isOperandLegal (*MI, OpNo, &New))
261
+ return false ;
262
+ Mod.setImm (Mod.getImm () | SISrcMods::OP_SEL_0);
263
+ Mod.setImm (Mod.getImm () & ~SISrcMods::OP_SEL_1);
264
+ Old.ChangeToImmediate ((Fold.ImmToFold >> 16 ) & 0xffff );
265
+ return true ;
254
266
}
267
+ MachineOperand New = MachineOperand::CreateImm (Fold.ImmToFold & 0xffff );
268
+ if (!TII->isOperandLegal (*MI, OpNo, &New))
269
+ return false ;
270
+ Mod.setImm (Mod.getImm () & ~SISrcMods::OP_SEL_1);
271
+ Old.ChangeToImmediate (Fold.ImmToFold & 0xffff );
272
+ return true ;
273
+ }
274
+
275
+ bool SIFoldOperands::updateOperand (FoldCandidate &Fold) const {
276
+ MachineInstr *MI = Fold.UseMI ;
277
+ MachineOperand &Old = MI->getOperand (Fold.UseOpNo );
278
+ assert (Old.isReg ());
279
+
280
+ if (Fold.isImm () && canUseImmWithOpSel (Fold))
281
+ return tryFoldImmWithOpSel (Fold);
255
282
256
283
if ((Fold.isImm () || Fold.isFI () || Fold.isGlobal ()) && Fold.needsShrink ()) {
257
284
MachineBasicBlock *MBB = MI->getParent ();
@@ -383,7 +410,13 @@ bool SIFoldOperands::tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
383
410
return false ;
384
411
};
385
412
386
- if (!TII->isOperandLegal (*MI, OpNo, OpToFold)) {
413
+ bool IsLegal = TII->isOperandLegal (*MI, OpNo, OpToFold);
414
+ if (!IsLegal && OpToFold->isImm ()) {
415
+ FoldCandidate Fold (MI, OpNo, OpToFold);
416
+ IsLegal = canUseImmWithOpSel (Fold);
417
+ }
418
+
419
+ if (!IsLegal) {
387
420
// Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2
388
421
unsigned NewOpc = macToMad (Opc);
389
422
if (NewOpc != AMDGPU::INSTRUCTION_LIST_END) {
0 commit comments