Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 173c7d4

Browse files
committed
GlobalISel: introduce G_PTR_MASK to simplify alloca handling.
This instruction clears the low bits of a pointer without requiring (possibly dodgy if pointers aren't ints) conversions to and from an integer. Since (as far as I'm aware) all masks are statically known, the instruction takes an immediate operand rather than a register to specify the mask. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@295103 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent dca651f commit 173c7d4

File tree

6 files changed

+69
-56
lines changed

6 files changed

+69
-56
lines changed

include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,22 @@ class MachineIRBuilder {
229229
MachineInstrBuilder buildGEP(unsigned Res, unsigned Op0,
230230
unsigned Op1);
231231

232+
/// Build and insert \p Res<def> = G_PTR_MASK \p Op0, \p NumBits
233+
///
234+
/// G_PTR_MASK clears the low bits of a pointer operand without destroying its
235+
/// pointer properties. This has the effect of rounding the address *down* to
236+
/// a specified alignment in bits.
237+
///
238+
/// \pre setBasicBlock or setMI must have been called.
239+
/// \pre \p Res and \p Op0 must be generic virtual registers with pointer
240+
/// type.
241+
/// \pre \p NumBits must be an integer representing the number of low bits to
242+
/// be cleared in \p Op0.
243+
///
244+
/// \return a MachineInstrBuilder for the newly created instruction.
245+
MachineInstrBuilder buildPtrMask(unsigned Res, unsigned Op0,
246+
uint32_t NumBits);
247+
232248
/// Build and insert \p Res<def>, \p CarryOut<def> = G_UADDE \p Op0,
233249
/// \p Op1, \p CarryIn
234250
///

include/llvm/Target/GenericOpcodes.td

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -110,13 +110,6 @@ def G_ADD : Instruction {
110110
let isCommutable = 1;
111111
}
112112

113-
// Generic pointer offset.
114-
def G_GEP : Instruction {
115-
let OutOperandList = (outs type0:$dst);
116-
let InOperandList = (ins type0:$src1, type1:$src2);
117-
let hasSideEffects = 0;
118-
}
119-
120113
// Generic subtraction.
121114
def G_SUB : Instruction {
122115
let OutOperandList = (outs type0:$dst);
@@ -231,6 +224,19 @@ def G_SELECT : Instruction {
231224
let hasSideEffects = 0;
232225
}
233226

227+
// Generic pointer offset.
228+
def G_GEP : Instruction {
229+
let OutOperandList = (outs type0:$dst);
230+
let InOperandList = (ins type0:$src1, type1:$src2);
231+
let hasSideEffects = 0;
232+
}
233+
234+
def G_PTR_MASK : Instruction {
235+
let OutOperandList = (outs type0:$dst);
236+
let InOperandList = (ins type0:$src, unknown:$bits);
237+
let hasSideEffects = 0;
238+
}
239+
234240
//------------------------------------------------------------------------------
235241
// Overflow ops
236242
//------------------------------------------------------------------------------

include/llvm/Target/TargetOpcodes.def

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,10 @@ HANDLE_TARGET_OPCODE(G_UITOFP)
378378
/// Generic pointer offset
379379
HANDLE_TARGET_OPCODE(G_GEP)
380380

381+
/// Clear the specified number of low bits in a pointer. This rounds the value
382+
/// *down* to the given alignment.
383+
HANDLE_TARGET_OPCODE(G_PTR_MASK)
384+
381385
/// Generic BRANCH instruction. This is an unconditional branch.
382386
HANDLE_TARGET_OPCODE(G_BR)
383387

lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 8 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -870,7 +870,7 @@ bool IRTranslator::translateAlloca(const User &U,
870870

871871
unsigned AllocSize = MRI->createGenericVirtualRegister(IntPtrTy);
872872
unsigned TySize = MRI->createGenericVirtualRegister(IntPtrTy);
873-
MIRBuilder.buildConstant(TySize, DL->getTypeAllocSize(Ty));
873+
MIRBuilder.buildConstant(TySize, -DL->getTypeAllocSize(Ty));
874874
MIRBuilder.buildMul(AllocSize, NumElts, TySize);
875875

876876
LLT PtrTy = LLT{*AI.getType(), *DL};
@@ -880,11 +880,8 @@ bool IRTranslator::translateAlloca(const User &U,
880880
unsigned SPTmp = MRI->createGenericVirtualRegister(PtrTy);
881881
MIRBuilder.buildCopy(SPTmp, SPReg);
882882

883-
unsigned SPInt = MRI->createGenericVirtualRegister(IntPtrTy);
884-
MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT).addDef(SPInt).addUse(SPTmp);
885-
886-
unsigned AllocInt = MRI->createGenericVirtualRegister(IntPtrTy);
887-
MIRBuilder.buildSub(AllocInt, SPInt, AllocSize);
883+
unsigned AllocTmp = MRI->createGenericVirtualRegister(PtrTy);
884+
MIRBuilder.buildGEP(AllocTmp, SPTmp, AllocSize);
888885

889886
// Handle alignment. We have to realign if the allocation granule was smaller
890887
// than stack alignment, or the specific alloca requires more than stack
@@ -896,25 +893,13 @@ bool IRTranslator::translateAlloca(const User &U,
896893
// Round the size of the allocation up to the stack alignment size
897894
// by add SA-1 to the size. This doesn't overflow because we're computing
898895
// an address inside an alloca.
899-
unsigned TmpSize = MRI->createGenericVirtualRegister(IntPtrTy);
900-
unsigned AlignMinus1 = MRI->createGenericVirtualRegister(IntPtrTy);
901-
MIRBuilder.buildConstant(AlignMinus1, Align - 1);
902-
MIRBuilder.buildSub(TmpSize, AllocInt, AlignMinus1);
903-
904-
unsigned AlignedAlloc = MRI->createGenericVirtualRegister(IntPtrTy);
905-
unsigned AlignMask = MRI->createGenericVirtualRegister(IntPtrTy);
906-
MIRBuilder.buildConstant(AlignMask, -(uint64_t)Align);
907-
MIRBuilder.buildAnd(AlignedAlloc, TmpSize, AlignMask);
908-
909-
AllocInt = AlignedAlloc;
896+
unsigned AlignedAlloc = MRI->createGenericVirtualRegister(PtrTy);
897+
MIRBuilder.buildPtrMask(AlignedAlloc, AllocTmp, Log2_32(Align));
898+
AllocTmp = AlignedAlloc;
910899
}
911900

912-
unsigned DstReg = getOrCreateVReg(AI);
913-
MIRBuilder.buildInstr(TargetOpcode::G_INTTOPTR)
914-
.addDef(DstReg)
915-
.addUse(AllocInt);
916-
917-
MIRBuilder.buildCopy(SPReg, DstReg);
901+
MIRBuilder.buildCopy(SPReg, AllocTmp);
902+
MIRBuilder.buildCopy(getOrCreateVReg(AI), AllocTmp);
918903

919904
MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, &AI);
920905
assert(MF->getFrameInfo().hasVarSizedObjects());

lib/CodeGen/GlobalISel/MachineIRBuilder.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,17 @@ MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0,
187187
.addUse(Op1);
188188
}
189189

190+
MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0,
191+
uint32_t NumBits) {
192+
assert(MRI->getType(Res).isPointer() &&
193+
MRI->getType(Res) == MRI->getType(Op0) && "type mismatch");
194+
195+
return buildInstr(TargetOpcode::G_PTR_MASK)
196+
.addDef(Res)
197+
.addUse(Op0)
198+
.addImm(NumBits);
199+
}
200+
190201
MachineInstrBuilder MachineIRBuilder::buildSub(unsigned Res, unsigned Op0,
191202
unsigned Op1) {
192203
assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) &&

test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,14 @@
33
; CHECK-LABEL: name: test_simple_alloca
44
; CHECK: [[NUMELTS:%[0-9]+]](s32) = COPY %w0
55
; CHECK: [[NUMELTS_64:%[0-9]+]](s64) = G_ZEXT [[NUMELTS]](s32)
6-
; CHECK: [[TYPE_SIZE:%[0-9]+]](s64) = G_CONSTANT i64 1
6+
; CHECK: [[TYPE_SIZE:%[0-9]+]](s64) = G_CONSTANT i64 -1
77
; CHECK: [[NUMBYTES:%[0-9]+]](s64) = G_MUL [[NUMELTS_64]], [[TYPE_SIZE]]
88
; CHECK: [[SP_TMP:%[0-9]+]](p0) = COPY %sp
9-
; CHECK: [[SP_INT:%[0-9]+]](s64) = G_PTRTOINT [[SP_TMP]](p0)
10-
; CHECK: [[ALLOC:%[0-9]+]](s64) = G_SUB [[SP_INT]], [[NUMBYTES]]
11-
; CHECK: [[ALIGN_M_1:%[0-9]+]](s64) = G_CONSTANT i64 15
12-
; CHECK: [[ALIGN_TMP:%[0-9]+]](s64) = G_SUB [[ALLOC]], [[ALIGN_M_1]]
13-
; CHECK: [[ALIGN_MASK:%[0-9]+]](s64) = G_CONSTANT i64 -16
14-
; CHECK: [[ALIGNED_ALLOC:%[0-9]+]](s64) = G_AND [[ALIGN_TMP]], [[ALIGN_MASK]]
15-
; CHECK: [[ALLOC_PTR:%[0-9]+]](p0) = G_INTTOPTR [[ALIGNED_ALLOC]](s64)
16-
; CHECK: %sp = COPY [[ALLOC_PTR]]
17-
; CHECK: %x0 = COPY [[ALLOC_PTR]]
9+
; CHECK: [[ALLOC:%[0-9]+]](p0) = G_GEP [[SP_TMP]], [[NUMBYTES]]
10+
; CHECK: [[ALIGNED_ALLOC:%[0-9]+]](p0) = G_PTR_MASK [[ALLOC]], 4
11+
; CHECK: %sp = COPY [[ALIGNED_ALLOC]]
12+
; CHECK: [[ALLOC:%[0-9]+]](p0) = COPY [[ALIGNED_ALLOC]]
13+
; CHECK: %x0 = COPY [[ALLOC]]
1814
define i8* @test_simple_alloca(i32 %numelts) {
1915
%addr = alloca i8, i32 %numelts
2016
ret i8* %addr
@@ -23,18 +19,14 @@ define i8* @test_simple_alloca(i32 %numelts) {
2319
; CHECK-LABEL: name: test_aligned_alloca
2420
; CHECK: [[NUMELTS:%[0-9]+]](s32) = COPY %w0
2521
; CHECK: [[NUMELTS_64:%[0-9]+]](s64) = G_ZEXT [[NUMELTS]](s32)
26-
; CHECK: [[TYPE_SIZE:%[0-9]+]](s64) = G_CONSTANT i64 1
22+
; CHECK: [[TYPE_SIZE:%[0-9]+]](s64) = G_CONSTANT i64 -1
2723
; CHECK: [[NUMBYTES:%[0-9]+]](s64) = G_MUL [[NUMELTS_64]], [[TYPE_SIZE]]
2824
; CHECK: [[SP_TMP:%[0-9]+]](p0) = COPY %sp
29-
; CHECK: [[SP_INT:%[0-9]+]](s64) = G_PTRTOINT [[SP_TMP]](p0)
30-
; CHECK: [[ALLOC:%[0-9]+]](s64) = G_SUB [[SP_INT]], [[NUMBYTES]]
31-
; CHECK: [[ALIGN_M_1:%[0-9]+]](s64) = G_CONSTANT i64 31
32-
; CHECK: [[ALIGN_TMP:%[0-9]+]](s64) = G_SUB [[ALLOC]], [[ALIGN_M_1]]
33-
; CHECK: [[ALIGN_MASK:%[0-9]+]](s64) = G_CONSTANT i64 -32
34-
; CHECK: [[ALIGNED_ALLOC:%[0-9]+]](s64) = G_AND [[ALIGN_TMP]], [[ALIGN_MASK]]
35-
; CHECK: [[ALLOC_PTR:%[0-9]+]](p0) = G_INTTOPTR [[ALIGNED_ALLOC]](s64)
36-
; CHECK: %sp = COPY [[ALLOC_PTR]]
37-
; CHECK: %x0 = COPY [[ALLOC_PTR]]
25+
; CHECK: [[ALLOC:%[0-9]+]](p0) = G_GEP [[SP_TMP]], [[NUMBYTES]]
26+
; CHECK: [[ALIGNED_ALLOC:%[0-9]+]](p0) = G_PTR_MASK [[ALLOC]], 5
27+
; CHECK: %sp = COPY [[ALIGNED_ALLOC]]
28+
; CHECK: [[ALLOC:%[0-9]+]](p0) = COPY [[ALIGNED_ALLOC]]
29+
; CHECK: %x0 = COPY [[ALLOC]]
3830
define i8* @test_aligned_alloca(i32 %numelts) {
3931
%addr = alloca i8, i32 %numelts, align 32
4032
ret i8* %addr
@@ -43,14 +35,13 @@ define i8* @test_aligned_alloca(i32 %numelts) {
4335
; CHECK-LABEL: name: test_natural_alloca
4436
; CHECK: [[NUMELTS:%[0-9]+]](s32) = COPY %w0
4537
; CHECK: [[NUMELTS_64:%[0-9]+]](s64) = G_ZEXT [[NUMELTS]](s32)
46-
; CHECK: [[TYPE_SIZE:%[0-9]+]](s64) = G_CONSTANT i64 16
38+
; CHECK: [[TYPE_SIZE:%[0-9]+]](s64) = G_CONSTANT i64 -16
4739
; CHECK: [[NUMBYTES:%[0-9]+]](s64) = G_MUL [[NUMELTS_64]], [[TYPE_SIZE]]
4840
; CHECK: [[SP_TMP:%[0-9]+]](p0) = COPY %sp
49-
; CHECK: [[SP_INT:%[0-9]+]](s64) = G_PTRTOINT [[SP_TMP]](p0)
50-
; CHECK: [[ALLOC:%[0-9]+]](s64) = G_SUB [[SP_INT]], [[NUMBYTES]]
51-
; CHECK: [[ALLOC_PTR:%[0-9]+]](p0) = G_INTTOPTR [[ALLOC]](s64)
52-
; CHECK: %sp = COPY [[ALLOC_PTR]]
53-
; CHECK: %x0 = COPY [[ALLOC_PTR]]
41+
; CHECK: [[ALLOC:%[0-9]+]](p0) = G_GEP [[SP_TMP]], [[NUMBYTES]]
42+
; CHECK: %sp = COPY [[ALLOC]]
43+
; CHECK: [[ALLOC_TMP:%[0-9]+]](p0) = COPY [[ALLOC]]
44+
; CHECK: %x0 = COPY [[ALLOC_TMP]]
5445
define i128* @test_natural_alloca(i32 %numelts) {
5546
%addr = alloca i128, i32 %numelts
5647
ret i128* %addr

0 commit comments

Comments
 (0)