@@ -8104,7 +8104,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
8104
8104
MOs.append (LoadMI.operands_begin () + NumOps - X86::AddrNumOperands, \
8105
8105
LoadMI.operands_begin () + NumOps); \
8106
8106
return foldMemoryBroadcast (MF, MI, Ops[0 ], MOs, InsertPt, /* Size=*/ SIZE, \
8107
- Alignment, /* AllowCommute=*/ true );
8107
+ /* AllowCommute=*/ true );
8108
8108
case X86::VPBROADCASTWZ128rm:
8109
8109
case X86::VPBROADCASTWZ256rm:
8110
8110
case X86::VPBROADCASTWZrm:
@@ -8136,75 +8136,34 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
8136
8136
/* Size=*/ 0 , Alignment, /* AllowCommute=*/ true );
8137
8137
}
8138
8138
8139
- MachineInstr *X86InstrInfo::foldMemoryBroadcast (
8140
- MachineFunction &MF, MachineInstr &MI, unsigned OpNum,
8141
- ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt,
8142
- unsigned BitsSize, Align Alignment, bool AllowCommute) const {
8139
+ MachineInstr *
8140
+ X86InstrInfo::foldMemoryBroadcast (MachineFunction &MF, MachineInstr &MI,
8141
+ unsigned OpNum, ArrayRef<MachineOperand> MOs,
8142
+ MachineBasicBlock::iterator InsertPt,
8143
+ unsigned BitsSize, bool AllowCommute) const {
8143
8144
8144
8145
if (auto *I = lookupBroadcastFoldTable (MI.getOpcode (), OpNum))
8145
8146
return matchBroadcastSize (*I, BitsSize)
8146
8147
? FuseInst (MF, I->DstOp , OpNum, MOs, InsertPt, MI, *this )
8147
8148
: nullptr ;
8148
8149
8149
- // TODO: Share code with foldMemoryOperandImpl for the commute
8150
8150
if (AllowCommute) {
8151
- unsigned CommuteOpIdx1 = OpNum, CommuteOpIdx2 = CommuteAnyOperandIndex;
8152
- if (findCommutedOpIndices (MI, CommuteOpIdx1, CommuteOpIdx2)) {
8153
- bool HasDef = MI.getDesc ().getNumDefs ();
8154
- Register Reg0 = HasDef ? MI.getOperand (0 ).getReg () : Register ();
8155
- Register Reg1 = MI.getOperand (CommuteOpIdx1).getReg ();
8156
- Register Reg2 = MI.getOperand (CommuteOpIdx2).getReg ();
8157
- bool Tied1 =
8158
- 0 == MI.getDesc ().getOperandConstraint (CommuteOpIdx1, MCOI::TIED_TO);
8159
- bool Tied2 =
8160
- 0 == MI.getDesc ().getOperandConstraint (CommuteOpIdx2, MCOI::TIED_TO);
8161
-
8162
- // If either of the commutable operands are tied to the destination
8163
- // then we can not commute + fold.
8164
- if ((HasDef && Reg0 == Reg1 && Tied1) ||
8165
- (HasDef && Reg0 == Reg2 && Tied2))
8166
- return nullptr ;
8167
-
8168
- MachineInstr *CommutedMI =
8169
- commuteInstruction (MI, false , CommuteOpIdx1, CommuteOpIdx2);
8170
- if (!CommutedMI) {
8171
- // Unable to commute.
8172
- return nullptr ;
8173
- }
8174
- if (CommutedMI != &MI) {
8175
- // New instruction. We can't fold from this.
8176
- CommutedMI->eraseFromParent ();
8177
- return nullptr ;
8178
- }
8179
-
8180
- // Attempt to fold with the commuted version of the instruction.
8181
- MachineInstr *NewMI = foldMemoryBroadcast (MF, MI, CommuteOpIdx2, MOs,
8182
- InsertPt, BitsSize, Alignment,
8183
- /* AllowCommute=*/ false );
8184
- if (NewMI)
8185
- return NewMI;
8186
-
8187
- // Folding failed again - undo the commute before returning.
8188
- MachineInstr *UncommutedMI =
8189
- commuteInstruction (MI, false , CommuteOpIdx1, CommuteOpIdx2);
8190
- if (!UncommutedMI) {
8191
- // Unable to commute.
8192
- return nullptr ;
8193
- }
8194
- if (UncommutedMI != &MI) {
8195
- // New instruction. It doesn't need to be kept.
8196
- UncommutedMI->eraseFromParent ();
8197
- return nullptr ;
8198
- }
8199
-
8200
- // Return here to prevent duplicate fuse failure report.
8151
+ // If the instruction and target operand are commutable, commute the
8152
+ // instruction and try again.
8153
+ unsigned CommuteOpIdx2 = commuteOperandsForFold (MI, OpNum);
8154
+ if (CommuteOpIdx2 == OpNum) {
8155
+ printFailMsgforFold (MI, OpNum);
8201
8156
return nullptr ;
8202
8157
}
8158
+ MachineInstr *NewMI =
8159
+ foldMemoryBroadcast (MF, MI, CommuteOpIdx2, MOs, InsertPt, BitsSize,
8160
+ /* AllowCommute=*/ false );
8161
+ if (NewMI)
8162
+ return NewMI;
8163
+ UndoCommuteForFold (MI, OpNum, CommuteOpIdx2);
8203
8164
}
8204
8165
8205
- // No fusion
8206
- if (PrintFailedFusing && !MI.isCopy ())
8207
- dbgs () << " We failed to fuse operand " << OpNum << " in " << MI;
8166
+ printFailMsgforFold (MI, OpNum);
8208
8167
return nullptr ;
8209
8168
}
8210
8169
0 commit comments