@@ -66,6 +66,10 @@ Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
66
66
int Val,
67
67
ARMCC::CondCodes Pred, unsigned PredReg,
68
68
unsigned MIFlags) const {
69
+ assert ((isARMLowRegister (DestReg) ||
70
+ isVirtualRegister (DestReg)) &&
71
+ " Thumb1 does not have ldr to high register" );
72
+
69
73
MachineFunction &MF = *MBB.getParent ();
70
74
const TargetInstrInfo &TII = *MF.getSubtarget ().getInstrInfo ();
71
75
MachineConstantPool *ConstantPool = MF.getConstantPool ();
@@ -106,15 +110,15 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
106
110
NumBytes = -NumBytes;
107
111
}
108
112
unsigned LdReg = DestReg;
109
- if (DestReg == ARM::SP) {
113
+ if (DestReg == ARM::SP)
110
114
assert (BaseReg == ARM::SP && " Unexpected!" );
115
+ if (!isARMLowRegister (DestReg) && !MRI.isVirtualRegister (DestReg))
111
116
LdReg = MF.getRegInfo ().createVirtualRegister (&ARM::tGPRRegClass);
112
- }
113
117
114
- if (NumBytes <= 255 && NumBytes >= 0 )
118
+ if (NumBytes <= 255 && NumBytes >= 0 && CanChangeCC) {
115
119
AddDefaultT1CC (BuildMI (MBB, MBBI, dl, TII.get (ARM::tMOVi8), LdReg))
116
120
.addImm (NumBytes).setMIFlags (MIFlags);
117
- else if (NumBytes < 0 && NumBytes >= -255 ) {
121
+ } else if (NumBytes < 0 && NumBytes >= -255 && CanChangeCC ) {
118
122
AddDefaultT1CC (BuildMI (MBB, MBBI, dl, TII.get (ARM::tMOVi8), LdReg))
119
123
.addImm (NumBytes).setMIFlags (MIFlags);
120
124
AddDefaultT1CC (BuildMI (MBB, MBBI, dl, TII.get (ARM::tRSB), LdReg))
@@ -124,7 +128,8 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
124
128
ARMCC::AL, 0 , MIFlags);
125
129
126
130
// Emit add / sub.
127
- int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
131
+ int Opc = (isSub) ? ARM::tSUBrr : ((isHigh || !CanChangeCC) ? ARM::tADDhirr
132
+ : ARM::tADDrr);
128
133
MachineInstrBuilder MIB =
129
134
BuildMI (MBB, MBBI, dl, TII.get (Opc), DestReg);
130
135
if (Opc != ARM::tADDhirr)
@@ -136,32 +141,10 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
136
141
AddDefaultPred (MIB);
137
142
}
138
143
139
- // / calcNumMI - Returns the number of instructions required to materialize
140
- // / the specific add / sub r, c instruction.
141
- static unsigned calcNumMI (int Opc, int ExtraOpc, unsigned Bytes,
142
- unsigned NumBits, unsigned Scale) {
143
- unsigned NumMIs = 0 ;
144
- unsigned Chunk = ((1 << NumBits) - 1 ) * Scale;
145
-
146
- if (Opc == ARM::tADDrSPi) {
147
- unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
148
- Bytes -= ThisVal;
149
- NumMIs++;
150
- NumBits = 8 ;
151
- Scale = 1 ; // Followed by a number of tADDi8.
152
- Chunk = ((1 << NumBits) - 1 ) * Scale;
153
- }
154
-
155
- NumMIs += Bytes / Chunk;
156
- if ((Bytes % Chunk) != 0 )
157
- NumMIs++;
158
- if (ExtraOpc)
159
- NumMIs++;
160
- return NumMIs;
161
- }
162
-
163
144
// / emitThumbRegPlusImmediate - Emits a series of instructions to materialize
164
- // / a destreg = basereg + immediate in Thumb code.
145
+ // / a destreg = basereg + immediate in Thumb code. Tries a series of ADDs or
146
+ // / SUBs first, and uses a constant pool value if the instruction sequence would
147
+ // / be too long. This is allowed to modify the condition flags.
165
148
void llvm::emitThumbRegPlusImmediate (MachineBasicBlock &MBB,
166
149
MachineBasicBlock::iterator &MBBI,
167
150
DebugLoc dl,
@@ -172,131 +155,146 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
172
155
bool isSub = NumBytes < 0 ;
173
156
unsigned Bytes = (unsigned )NumBytes;
174
157
if (isSub) Bytes = -NumBytes;
175
- bool isMul4 = (Bytes & 3 ) == 0 ;
176
- bool isTwoAddr = false ;
177
- bool DstNotEqBase = false ;
178
- unsigned NumBits = 1 ;
179
- unsigned Scale = 1 ;
180
- int Opc = 0 ;
158
+
159
+ int CopyOpc = 0 ;
160
+ unsigned CopyBits = 0 ;
161
+ unsigned CopyScale = 1 ;
162
+ bool CopyNeedsCC = false ;
181
163
int ExtraOpc = 0 ;
182
- bool NeedCC = false ;
183
-
184
- if (DestReg == BaseReg && BaseReg == ARM::SP) {
185
- assert (isMul4 && " Thumb sp inc / dec size must be multiple of 4!" );
186
- NumBits = 7 ;
187
- Scale = 4 ;
188
- Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
189
- isTwoAddr = true ;
190
- } else if (!isSub && BaseReg == ARM::SP) {
191
- // r1 = add sp, 403
192
- // =>
193
- // r1 = add sp, 100 * 4
194
- // r1 = add r1, 3
195
- if (!isMul4) {
196
- Bytes &= ~3 ;
197
- ExtraOpc = ARM::tADDi3;
164
+ unsigned ExtraBits = 0 ;
165
+ unsigned ExtraScale = 1 ;
166
+ bool ExtraNeedsCC = false ;
167
+
168
+ // Strategy:
169
+ // We need to select two types of instruction, maximizing the available
170
+ // immediate range of each. The instructions we use will depend on whether
171
+ // DestReg and BaseReg are low, high or the stack pointer.
172
+ // * CopyOpc - DestReg = BaseReg + imm
173
+ // This will be emitted once if DestReg != BaseReg, and never if
174
+ // DestReg == BaseReg.
175
+ // * ExtraOpc - DestReg = DestReg + imm
176
+ // This will be emitted as many times as necessary to add the
177
+ // full immediate.
178
+ // If the immediate ranges of these instructions are not large enough to cover
179
+ // NumBytes with a reasonable number of instructions, we fall back to using a
180
+ // value loaded from a constant pool.
181
+ if (DestReg == ARM::SP) {
182
+ if (BaseReg == ARM::SP) {
183
+ // sp -> sp
184
+ // Already in right reg, no copy needed
185
+ } else {
186
+ // low -> sp or high -> sp
187
+ CopyOpc = ARM::tMOVr;
188
+ CopyBits = 0 ;
198
189
}
199
- DstNotEqBase = true ;
200
- NumBits = 8 ;
201
- Scale = 4 ;
202
- Opc = ARM::tADDrSPi;
203
- } else {
204
- // sp = sub sp, c
205
- // r1 = sub sp, c
206
- // r8 = sub sp, c
207
- if (DestReg != BaseReg)
208
- DstNotEqBase = true ;
209
- if (DestReg == ARM::SP) {
210
- Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
211
- assert (isMul4 && " Thumb sp inc / dec size must be multiple of 4!" );
212
- NumBits = 7 ;
213
- Scale = 4 ;
190
+ ExtraOpc = isSub ? ARM::tSUBspi : ARM::tADDspi;
191
+ ExtraBits = 7 ;
192
+ ExtraScale = 4 ;
193
+ } else if (isARMLowRegister (DestReg)) {
194
+ if (BaseReg == ARM::SP) {
195
+ // sp -> low
196
+ assert (!isSub && " Thumb1 does not have tSUBrSPi" );
197
+ CopyOpc = ARM::tADDrSPi;
198
+ CopyBits = 8 ;
199
+ CopyScale = 4 ;
200
+ } else if (DestReg == BaseReg) {
201
+ // low -> same low
202
+ // Already in right reg, no copy needed
203
+ } else if (isARMLowRegister (BaseReg)) {
204
+ // low -> different low
205
+ CopyOpc = isSub ? ARM::tSUBi3 : ARM::tADDi3;
206
+ CopyBits = 3 ;
207
+ CopyNeedsCC = true ;
208
+ } else {
209
+ // high -> low
210
+ CopyOpc = ARM::tMOVr;
211
+ CopyBits = 0 ;
212
+ }
213
+ ExtraOpc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
214
+ ExtraBits = 8 ;
215
+ ExtraNeedsCC = true ;
216
+ } else /* DestReg is high */ {
217
+ if (DestReg == BaseReg) {
218
+ // high -> same high
219
+ // Already in right reg, no copy needed
214
220
} else {
215
- Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
216
- NumBits = 8 ;
217
- NeedCC = true ;
221
+ // {low,high,sp} -> high
222
+ CopyOpc = ARM::tMOVr ;
223
+ CopyBits = 0 ;
218
224
}
219
- isTwoAddr = true ;
225
+ ExtraOpc = 0 ;
220
226
}
221
227
222
- unsigned NumMIs = calcNumMI (Opc, ExtraOpc, Bytes, NumBits, Scale);
228
+ // We could handle an unaligned immediate with an unaligned copy instruction
229
+ // and an aligned extra instruction, but this case is not currently needed.
230
+ assert (((Bytes & 3 ) == 0 || ExtraScale == 1 ) &&
231
+ " Unaligned offset, but all instructions require alignment" );
232
+
233
+ unsigned CopyRange = ((1 << CopyBits) - 1 ) * CopyScale;
234
+ // If we would emit the copy with an immediate of 0, just use tMOVr.
235
+ if (CopyOpc && Bytes < CopyScale) {
236
+ CopyOpc = ARM::tMOVr;
237
+ CopyBits = 0 ;
238
+ CopyScale = 1 ;
239
+ CopyNeedsCC = false ;
240
+ CopyRange = 0 ;
241
+ }
242
+ unsigned ExtraRange = ((1 << ExtraBits) - 1 ) * ExtraScale; // per instruction
243
+ unsigned RequiredCopyInstrs = CopyOpc ? 1 : 0 ;
244
+ unsigned RangeAfterCopy = (CopyRange > Bytes) ? 0 : (Bytes - CopyRange);
245
+
246
+ // We could handle this case when the copy instruction does not require an
247
+ // aligned immediate, but we do not currently do this.
248
+ assert (RangeAfterCopy % ExtraScale == 0 &&
249
+ " Extra instruction requires immediate to be aligned" );
250
+
251
+ unsigned RequiredExtraInstrs;
252
+ if (ExtraRange)
253
+ RequiredExtraInstrs = RoundUpToAlignment (RangeAfterCopy, ExtraRange) / ExtraRange;
254
+ else if (RangeAfterCopy > 0 )
255
+ // We need an extra instruction but none is available
256
+ RequiredExtraInstrs = 1000000 ;
257
+ else
258
+ RequiredExtraInstrs = 0 ;
259
+ unsigned RequiredInstrs = RequiredCopyInstrs + RequiredExtraInstrs;
223
260
unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2 ;
224
- if (NumMIs > Threshold) {
225
- // This will expand into too many instructions. Load the immediate from a
226
- // constpool entry.
261
+
262
+ // Use a constant pool, if the sequence of ADDs/SUBs is too expensive.
263
+ if (RequiredInstrs > Threshold) {
227
264
emitThumbRegPlusImmInReg (MBB, MBBI, dl,
228
265
DestReg, BaseReg, NumBytes, true ,
229
266
TII, MRI, MIFlags);
230
267
return ;
231
268
}
232
269
233
- if (DstNotEqBase) {
234
- if (isARMLowRegister (DestReg) && isARMLowRegister (BaseReg)) {
235
- // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
236
- unsigned Chunk = (1 << 3 ) - 1 ;
237
- unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
238
- Bytes -= ThisVal;
239
- const MCInstrDesc &MCID = TII.get (isSub ? ARM::tSUBi3 : ARM::tADDi3);
240
- const MachineInstrBuilder MIB =
241
- AddDefaultT1CC (BuildMI (MBB, MBBI, dl, MCID, DestReg)
242
- .setMIFlags (MIFlags));
243
- AddDefaultPred (MIB.addReg (BaseReg, RegState::Kill).addImm (ThisVal));
244
- } else if (isARMLowRegister (DestReg) && BaseReg == ARM::SP && Bytes > 0 ) {
245
- unsigned ThisVal = std::min (1020U , Bytes / 4 * 4 );
246
- Bytes -= ThisVal;
247
- AddDefaultPred (BuildMI (MBB, MBBI, dl, TII.get (ARM::tADDrSPi), DestReg)
248
- .addReg (BaseReg, RegState::Kill).addImm (ThisVal / 4 ))
249
- .setMIFlags (MIFlags);
250
- } else {
251
- AddDefaultPred (BuildMI (MBB, MBBI, dl, TII.get (ARM::tMOVr), DestReg)
252
- .addReg (BaseReg, RegState::Kill))
253
- .setMIFlags (MIFlags);
270
+ // Emit zero or one copy instructions
271
+ if (CopyOpc) {
272
+ unsigned CopyImm = std::min (Bytes, CopyRange) / CopyScale;
273
+ Bytes -= CopyImm * CopyScale;
274
+
275
+ MachineInstrBuilder MIB = BuildMI (MBB, MBBI, dl, TII.get (CopyOpc), DestReg);
276
+ if (CopyNeedsCC)
277
+ MIB = AddDefaultT1CC (MIB);
278
+ MIB.addReg (BaseReg, RegState::Kill);
279
+ if (CopyOpc != ARM::tMOVr) {
280
+ MIB.addImm (CopyImm);
254
281
}
282
+ AddDefaultPred (MIB.setMIFlags (MIFlags));
283
+
255
284
BaseReg = DestReg;
256
285
}
257
286
258
- unsigned Chunk = (( 1 << NumBits) - 1 ) * Scale;
287
+ // Emit zero or more in-place add/sub instructions
259
288
while (Bytes) {
260
- unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
261
- Bytes -= ThisVal;
262
- ThisVal /= Scale;
263
- // Build the new tADD / tSUB.
264
- if (isTwoAddr) {
265
- MachineInstrBuilder MIB = BuildMI (MBB, MBBI, dl, TII.get (Opc), DestReg);
266
- if (NeedCC)
267
- MIB = AddDefaultT1CC (MIB);
268
- MIB.addReg (DestReg).addImm (ThisVal);
269
- MIB = AddDefaultPred (MIB);
270
- MIB.setMIFlags (MIFlags);
271
- } else {
272
- bool isKill = BaseReg != ARM::SP;
273
- MachineInstrBuilder MIB = BuildMI (MBB, MBBI, dl, TII.get (Opc), DestReg);
274
- if (NeedCC)
275
- MIB = AddDefaultT1CC (MIB);
276
- MIB.addReg (BaseReg, getKillRegState (isKill)).addImm (ThisVal);
277
- MIB = AddDefaultPred (MIB);
278
- MIB.setMIFlags (MIFlags);
279
-
280
- BaseReg = DestReg;
281
- if (Opc == ARM::tADDrSPi) {
282
- // r4 = add sp, imm
283
- // r4 = add r4, imm
284
- // ...
285
- NumBits = 8 ;
286
- Scale = 1 ;
287
- Chunk = ((1 << NumBits) - 1 ) * Scale;
288
- Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
289
- NeedCC = isTwoAddr = true ;
290
- }
291
- }
292
- }
289
+ unsigned ExtraImm = std::min (Bytes, ExtraRange) / ExtraScale;
290
+ Bytes -= ExtraImm * ExtraScale;
293
291
294
- if ( ExtraOpc) {
295
- const MCInstrDesc &MCID = TII. get (ExtraOpc);
296
- AddDefaultPred ( AddDefaultT1CC ( BuildMI (MBB, MBBI, dl, MCID, DestReg))
297
- .addReg (DestReg, RegState::Kill)
298
- . addImm ((( unsigned )NumBytes) & 3 )
299
- .setMIFlags (MIFlags) );
292
+ MachineInstrBuilder MIB = BuildMI (MBB, MBBI, dl, TII. get ( ExtraOpc), DestReg);
293
+ if (ExtraNeedsCC)
294
+ MIB = AddDefaultT1CC (MIB);
295
+ MIB .addReg (BaseReg). addImm (ExtraImm);
296
+ MIB = AddDefaultPred (MIB);
297
+ MIB .setMIFlags (MIFlags);
300
298
}
301
299
}
302
300
0 commit comments