Skip to content

Commit 970b0d5

Browse files
committed
[Thumb1] Re-write emitThumbRegPlusImmediate
This was motivated by a bug which caused code like this to be miscompiled: declare void @take_ptr(i8*) define void @test() { %addr1.32 = alloca i8 %addr2.32 = alloca i32, i32 1028 call void @take_ptr(i8* %addr1) ret void } This was emitting the following assembly to get the value of %addr1: add r0, sp, #1020 add r0, r0, #8 However, "add r0, r0, #8" is not a valid Thumb1 instruction, and this could not be assembled. The generated object file contained this, resulting in r0 holding SP+8 rather tha SP+1028: add r0, sp, #1020 add r0, sp, #8 This function looked like it could have caused miscompilations for other combinations of registers and offsets (though I don't think it is currently called with these), and the heuristic it used did not match the emitted code in all cases. llvm-svn: 222125
1 parent 236b0ca commit 970b0d5

File tree

4 files changed

+212
-159
lines changed

4 files changed

+212
-159
lines changed

llvm/include/llvm/Support/MathExtras.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -595,10 +595,10 @@ inline uint64_t PowerOf2Floor(uint64_t A) {
595595
/// RoundUpToAlignment(5, 8) = 8
596596
/// RoundUpToAlignment(17, 8) = 24
597597
/// RoundUpToAlignment(~0LL, 8) = 0
598+
/// RoundUpToAlignment(321, 255) = 510
598599
/// \endcode
599600
inline uint64_t RoundUpToAlignment(uint64_t Value, uint64_t Align) {
600-
assert(isPowerOf2_64(Align) && "Alignment must be power of 2!");
601-
return (Value + Align - 1) & ~uint64_t(Align - 1);
601+
return (Value + Align - 1) / Align * Align;
602602
}
603603

604604
/// Returns the offset to the next integer (mod 2**64) that is greater than

llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp

Lines changed: 134 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,10 @@ Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
6666
int Val,
6767
ARMCC::CondCodes Pred, unsigned PredReg,
6868
unsigned MIFlags) const {
69+
assert((isARMLowRegister(DestReg) ||
70+
isVirtualRegister(DestReg)) &&
71+
"Thumb1 does not have ldr to high register");
72+
6973
MachineFunction &MF = *MBB.getParent();
7074
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
7175
MachineConstantPool *ConstantPool = MF.getConstantPool();
@@ -106,15 +110,15 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
106110
NumBytes = -NumBytes;
107111
}
108112
unsigned LdReg = DestReg;
109-
if (DestReg == ARM::SP) {
113+
if (DestReg == ARM::SP)
110114
assert(BaseReg == ARM::SP && "Unexpected!");
115+
if (!isARMLowRegister(DestReg) && !MRI.isVirtualRegister(DestReg))
111116
LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
112-
}
113117

114-
if (NumBytes <= 255 && NumBytes >= 0)
118+
if (NumBytes <= 255 && NumBytes >= 0 && CanChangeCC) {
115119
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
116120
.addImm(NumBytes).setMIFlags(MIFlags);
117-
else if (NumBytes < 0 && NumBytes >= -255) {
121+
} else if (NumBytes < 0 && NumBytes >= -255 && CanChangeCC) {
118122
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
119123
.addImm(NumBytes).setMIFlags(MIFlags);
120124
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg))
@@ -124,7 +128,8 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
124128
ARMCC::AL, 0, MIFlags);
125129

126130
// Emit add / sub.
127-
int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
131+
int Opc = (isSub) ? ARM::tSUBrr : ((isHigh || !CanChangeCC) ? ARM::tADDhirr
132+
: ARM::tADDrr);
128133
MachineInstrBuilder MIB =
129134
BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
130135
if (Opc != ARM::tADDhirr)
@@ -136,32 +141,10 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
136141
AddDefaultPred(MIB);
137142
}
138143

139-
/// calcNumMI - Returns the number of instructions required to materialize
140-
/// the specific add / sub r, c instruction.
141-
static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
142-
unsigned NumBits, unsigned Scale) {
143-
unsigned NumMIs = 0;
144-
unsigned Chunk = ((1 << NumBits) - 1) * Scale;
145-
146-
if (Opc == ARM::tADDrSPi) {
147-
unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
148-
Bytes -= ThisVal;
149-
NumMIs++;
150-
NumBits = 8;
151-
Scale = 1; // Followed by a number of tADDi8.
152-
Chunk = ((1 << NumBits) - 1) * Scale;
153-
}
154-
155-
NumMIs += Bytes / Chunk;
156-
if ((Bytes % Chunk) != 0)
157-
NumMIs++;
158-
if (ExtraOpc)
159-
NumMIs++;
160-
return NumMIs;
161-
}
162-
163144
/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
164-
/// a destreg = basereg + immediate in Thumb code.
145+
/// a destreg = basereg + immediate in Thumb code. Tries a series of ADDs or
146+
/// SUBs first, and uses a constant pool value if the instruction sequence would
147+
/// be too long. This is allowed to modify the condition flags.
165148
void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
166149
MachineBasicBlock::iterator &MBBI,
167150
DebugLoc dl,
@@ -172,131 +155,146 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
172155
bool isSub = NumBytes < 0;
173156
unsigned Bytes = (unsigned)NumBytes;
174157
if (isSub) Bytes = -NumBytes;
175-
bool isMul4 = (Bytes & 3) == 0;
176-
bool isTwoAddr = false;
177-
bool DstNotEqBase = false;
178-
unsigned NumBits = 1;
179-
unsigned Scale = 1;
180-
int Opc = 0;
158+
159+
int CopyOpc = 0;
160+
unsigned CopyBits = 0;
161+
unsigned CopyScale = 1;
162+
bool CopyNeedsCC = false;
181163
int ExtraOpc = 0;
182-
bool NeedCC = false;
183-
184-
if (DestReg == BaseReg && BaseReg == ARM::SP) {
185-
assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
186-
NumBits = 7;
187-
Scale = 4;
188-
Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
189-
isTwoAddr = true;
190-
} else if (!isSub && BaseReg == ARM::SP) {
191-
// r1 = add sp, 403
192-
// =>
193-
// r1 = add sp, 100 * 4
194-
// r1 = add r1, 3
195-
if (!isMul4) {
196-
Bytes &= ~3;
197-
ExtraOpc = ARM::tADDi3;
164+
unsigned ExtraBits = 0;
165+
unsigned ExtraScale = 1;
166+
bool ExtraNeedsCC = false;
167+
168+
// Strategy:
169+
// We need to select two types of instruction, maximizing the available
170+
// immediate range of each. The instructions we use will depend on whether
171+
// DestReg and BaseReg are low, high or the stack pointer.
172+
// * CopyOpc - DestReg = BaseReg + imm
173+
// This will be emitted once if DestReg != BaseReg, and never if
174+
// DestReg == BaseReg.
175+
// * ExtraOpc - DestReg = DestReg + imm
176+
// This will be emitted as many times as necessary to add the
177+
// full immediate.
178+
// If the immediate ranges of these instructions are not large enough to cover
179+
// NumBytes with a reasonable number of instructions, we fall back to using a
180+
// value loaded from a constant pool.
181+
if (DestReg == ARM::SP) {
182+
if (BaseReg == ARM::SP) {
183+
// sp -> sp
184+
// Already in right reg, no copy needed
185+
} else {
186+
// low -> sp or high -> sp
187+
CopyOpc = ARM::tMOVr;
188+
CopyBits = 0;
198189
}
199-
DstNotEqBase = true;
200-
NumBits = 8;
201-
Scale = 4;
202-
Opc = ARM::tADDrSPi;
203-
} else {
204-
// sp = sub sp, c
205-
// r1 = sub sp, c
206-
// r8 = sub sp, c
207-
if (DestReg != BaseReg)
208-
DstNotEqBase = true;
209-
if (DestReg == ARM::SP) {
210-
Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
211-
assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
212-
NumBits = 7;
213-
Scale = 4;
190+
ExtraOpc = isSub ? ARM::tSUBspi : ARM::tADDspi;
191+
ExtraBits = 7;
192+
ExtraScale = 4;
193+
} else if (isARMLowRegister(DestReg)) {
194+
if (BaseReg == ARM::SP) {
195+
// sp -> low
196+
assert(!isSub && "Thumb1 does not have tSUBrSPi");
197+
CopyOpc = ARM::tADDrSPi;
198+
CopyBits = 8;
199+
CopyScale = 4;
200+
} else if (DestReg == BaseReg) {
201+
// low -> same low
202+
// Already in right reg, no copy needed
203+
} else if (isARMLowRegister(BaseReg)) {
204+
// low -> different low
205+
CopyOpc = isSub ? ARM::tSUBi3 : ARM::tADDi3;
206+
CopyBits = 3;
207+
CopyNeedsCC = true;
208+
} else {
209+
// high -> low
210+
CopyOpc = ARM::tMOVr;
211+
CopyBits = 0;
212+
}
213+
ExtraOpc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
214+
ExtraBits = 8;
215+
ExtraNeedsCC = true;
216+
} else /* DestReg is high */ {
217+
if (DestReg == BaseReg) {
218+
// high -> same high
219+
// Already in right reg, no copy needed
214220
} else {
215-
Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
216-
NumBits = 8;
217-
NeedCC = true;
221+
// {low,high,sp} -> high
222+
CopyOpc = ARM::tMOVr;
223+
CopyBits = 0;
218224
}
219-
isTwoAddr = true;
225+
ExtraOpc = 0;
220226
}
221227

222-
unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale);
228+
// We could handle an unaligned immediate with an unaligned copy instruction
229+
// and an aligned extra instruction, but this case is not currently needed.
230+
assert(((Bytes & 3) == 0 || ExtraScale == 1) &&
231+
"Unaligned offset, but all instructions require alignment");
232+
233+
unsigned CopyRange = ((1 << CopyBits) - 1) * CopyScale;
234+
// If we would emit the copy with an immediate of 0, just use tMOVr.
235+
if (CopyOpc && Bytes < CopyScale) {
236+
CopyOpc = ARM::tMOVr;
237+
CopyBits = 0;
238+
CopyScale = 1;
239+
CopyNeedsCC = false;
240+
CopyRange = 0;
241+
}
242+
unsigned ExtraRange = ((1 << ExtraBits) - 1) * ExtraScale; // per instruction
243+
unsigned RequiredCopyInstrs = CopyOpc ? 1 : 0;
244+
unsigned RangeAfterCopy = (CopyRange > Bytes) ? 0 : (Bytes - CopyRange);
245+
246+
// We could handle this case when the copy instruction does not require an
247+
// aligned immediate, but we do not currently do this.
248+
assert(RangeAfterCopy % ExtraScale == 0 &&
249+
"Extra instruction requires immediate to be aligned");
250+
251+
unsigned RequiredExtraInstrs;
252+
if (ExtraRange)
253+
RequiredExtraInstrs = RoundUpToAlignment(RangeAfterCopy, ExtraRange) / ExtraRange;
254+
else if (RangeAfterCopy > 0)
255+
// We need an extra instruction but none is available
256+
RequiredExtraInstrs = 1000000;
257+
else
258+
RequiredExtraInstrs = 0;
259+
unsigned RequiredInstrs = RequiredCopyInstrs + RequiredExtraInstrs;
223260
unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2;
224-
if (NumMIs > Threshold) {
225-
// This will expand into too many instructions. Load the immediate from a
226-
// constpool entry.
261+
262+
// Use a constant pool, if the sequence of ADDs/SUBs is too expensive.
263+
if (RequiredInstrs > Threshold) {
227264
emitThumbRegPlusImmInReg(MBB, MBBI, dl,
228265
DestReg, BaseReg, NumBytes, true,
229266
TII, MRI, MIFlags);
230267
return;
231268
}
232269

233-
if (DstNotEqBase) {
234-
if (isARMLowRegister(DestReg) && isARMLowRegister(BaseReg)) {
235-
// If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
236-
unsigned Chunk = (1 << 3) - 1;
237-
unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
238-
Bytes -= ThisVal;
239-
const MCInstrDesc &MCID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3);
240-
const MachineInstrBuilder MIB =
241-
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg)
242-
.setMIFlags(MIFlags));
243-
AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal));
244-
} else if (isARMLowRegister(DestReg) && BaseReg == ARM::SP && Bytes > 0) {
245-
unsigned ThisVal = std::min(1020U, Bytes / 4 * 4);
246-
Bytes -= ThisVal;
247-
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), DestReg)
248-
.addReg(BaseReg, RegState::Kill).addImm(ThisVal / 4))
249-
.setMIFlags(MIFlags);
250-
} else {
251-
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
252-
.addReg(BaseReg, RegState::Kill))
253-
.setMIFlags(MIFlags);
270+
// Emit zero or one copy instructions
271+
if (CopyOpc) {
272+
unsigned CopyImm = std::min(Bytes, CopyRange) / CopyScale;
273+
Bytes -= CopyImm * CopyScale;
274+
275+
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(CopyOpc), DestReg);
276+
if (CopyNeedsCC)
277+
MIB = AddDefaultT1CC(MIB);
278+
MIB.addReg(BaseReg, RegState::Kill);
279+
if (CopyOpc != ARM::tMOVr) {
280+
MIB.addImm(CopyImm);
254281
}
282+
AddDefaultPred(MIB.setMIFlags(MIFlags));
283+
255284
BaseReg = DestReg;
256285
}
257286

258-
unsigned Chunk = ((1 << NumBits) - 1) * Scale;
287+
// Emit zero or more in-place add/sub instructions
259288
while (Bytes) {
260-
unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
261-
Bytes -= ThisVal;
262-
ThisVal /= Scale;
263-
// Build the new tADD / tSUB.
264-
if (isTwoAddr) {
265-
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
266-
if (NeedCC)
267-
MIB = AddDefaultT1CC(MIB);
268-
MIB.addReg(DestReg).addImm(ThisVal);
269-
MIB = AddDefaultPred(MIB);
270-
MIB.setMIFlags(MIFlags);
271-
} else {
272-
bool isKill = BaseReg != ARM::SP;
273-
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
274-
if (NeedCC)
275-
MIB = AddDefaultT1CC(MIB);
276-
MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
277-
MIB = AddDefaultPred(MIB);
278-
MIB.setMIFlags(MIFlags);
279-
280-
BaseReg = DestReg;
281-
if (Opc == ARM::tADDrSPi) {
282-
// r4 = add sp, imm
283-
// r4 = add r4, imm
284-
// ...
285-
NumBits = 8;
286-
Scale = 1;
287-
Chunk = ((1 << NumBits) - 1) * Scale;
288-
Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
289-
NeedCC = isTwoAddr = true;
290-
}
291-
}
292-
}
289+
unsigned ExtraImm = std::min(Bytes, ExtraRange) / ExtraScale;
290+
Bytes -= ExtraImm * ExtraScale;
293291

294-
if (ExtraOpc) {
295-
const MCInstrDesc &MCID = TII.get(ExtraOpc);
296-
AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg))
297-
.addReg(DestReg, RegState::Kill)
298-
.addImm(((unsigned)NumBytes) & 3)
299-
.setMIFlags(MIFlags));
292+
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg);
293+
if (ExtraNeedsCC)
294+
MIB = AddDefaultT1CC(MIB);
295+
MIB.addReg(BaseReg).addImm(ExtraImm);
296+
MIB = AddDefaultPred(MIB);
297+
MIB.setMIFlags(MIFlags);
300298
}
301299
}
302300

0 commit comments

Comments
 (0)