Skip to content

Commit 3be3b33

Browse files
authored
[SandboxVec][BottomUpVec] Implement pack of scalars (#115549)
This patch implements packing of scalar operands when the vectorizer decides to stop vectorizing. Packing is implemented with a sequence of InsertElement instructions. Packing vectors requires different instructions so it's implemented in a follow-up patch.
1 parent 012fad9 commit 3be3b33

File tree

5 files changed

+145
-7
lines changed

5 files changed

+145
-7
lines changed

llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ class BottomUpVec final : public FunctionPass {
3131
/// \p Bndl. \p Operands are the already vectorized operands.
3232
Value *createVectorInstr(ArrayRef<Value *> Bndl, ArrayRef<Value *> Operands);
3333
void tryEraseDeadInstrs();
34-
Value *vectorizeRec(ArrayRef<Value *> Bndl);
34+
Value *createPack(ArrayRef<Value *> ToPack);
35+
Value *vectorizeRec(ArrayRef<Value *> Bndl, unsigned Depth);
3536
bool tryVectorize(ArrayRef<Value *> Seeds);
3637

3738
// The PM containing the pipeline of region passes.

llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,31 @@ class VecUtils {
108108
}
109109
return LowestI;
110110
}
111+
/// If all values in \p Bndl are of the same scalar type then return it,
112+
/// otherwise return nullptr.
113+
static Type *tryGetCommonScalarType(ArrayRef<Value *> Bndl) {
114+
Value *V0 = Bndl[0];
115+
Type *Ty0 = Utils::getExpectedType(V0);
116+
Type *ScalarTy = VecUtils::getElementType(Ty0);
117+
for (auto *V : drop_begin(Bndl)) {
118+
Type *NTy = Utils::getExpectedType(V);
119+
Type *NScalarTy = VecUtils::getElementType(NTy);
120+
if (NScalarTy != ScalarTy)
121+
return nullptr;
122+
}
123+
return ScalarTy;
124+
}
125+
126+
/// Similar to tryGetCommonScalarType() but will assert that there is a common
127+
/// type. So this is faster in release builds as it won't iterate through the
128+
/// values.
129+
static Type *getCommonScalarType(ArrayRef<Value *> Bndl) {
130+
Value *V0 = Bndl[0];
131+
Type *Ty0 = Utils::getExpectedType(V0);
132+
Type *ScalarTy = VecUtils::getElementType(Ty0);
133+
assert(tryGetCommonScalarType(Bndl) && "Expected common scalar type!");
134+
return ScalarTy;
135+
}
111136
};
112137

113138
} // namespace llvm::sandboxir

llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,39 @@ void BottomUpVec::tryEraseDeadInstrs() {
164164
DeadInstrCandidates.clear();
165165
}
166166

167-
Value *BottomUpVec::vectorizeRec(ArrayRef<Value *> Bndl) {
167+
Value *BottomUpVec::createPack(ArrayRef<Value *> ToPack) {
168+
BasicBlock::iterator WhereIt = getInsertPointAfterInstrs(ToPack);
169+
170+
Type *ScalarTy = VecUtils::getCommonScalarType(ToPack);
171+
unsigned Lanes = VecUtils::getNumLanes(ToPack);
172+
Type *VecTy = VecUtils::getWideType(ScalarTy, Lanes);
173+
174+
// Create a series of pack instructions.
175+
Value *LastInsert = PoisonValue::get(VecTy);
176+
177+
Context &Ctx = ToPack[0]->getContext();
178+
179+
unsigned InsertIdx = 0;
180+
for (Value *Elm : ToPack) {
181+
// An element can be either scalar or vector. We need to generate different
182+
// IR for each case.
183+
if (Elm->getType()->isVectorTy()) {
184+
llvm_unreachable("Unimplemented");
185+
} else {
186+
Constant *InsertLaneC =
187+
ConstantInt::getSigned(Type::getInt32Ty(Ctx), InsertIdx++);
188+
// This may be folded into a Constant if LastInsert is a Constant. In that
189+
// case we only collect the last constant.
190+
LastInsert = InsertElementInst::create(LastInsert, Elm, InsertLaneC,
191+
WhereIt, Ctx, "Pack");
192+
if (auto *NewI = dyn_cast<Instruction>(LastInsert))
193+
WhereIt = std::next(NewI->getIterator());
194+
}
195+
}
196+
return LastInsert;
197+
}
198+
199+
Value *BottomUpVec::vectorizeRec(ArrayRef<Value *> Bndl, unsigned Depth) {
168200
Value *NewVec = nullptr;
169201
const auto &LegalityRes = Legality->canVectorize(Bndl);
170202
switch (LegalityRes.getSubclassID()) {
@@ -178,15 +210,15 @@ Value *BottomUpVec::vectorizeRec(ArrayRef<Value *> Bndl) {
178210
break;
179211
case Instruction::Opcode::Store: {
180212
// Don't recurse towards the pointer operand.
181-
auto *VecOp = vectorizeRec(getOperand(Bndl, 0));
213+
auto *VecOp = vectorizeRec(getOperand(Bndl, 0), Depth + 1);
182214
VecOperands.push_back(VecOp);
183215
VecOperands.push_back(cast<StoreInst>(I)->getPointerOperand());
184216
break;
185217
}
186218
default:
187219
// Visit all operands.
188220
for (auto OpIdx : seq<unsigned>(I->getNumOperands())) {
189-
auto *VecOp = vectorizeRec(getOperand(Bndl, OpIdx));
221+
auto *VecOp = vectorizeRec(getOperand(Bndl, OpIdx), Depth + 1);
190222
VecOperands.push_back(VecOp);
191223
}
192224
break;
@@ -201,16 +233,19 @@ Value *BottomUpVec::vectorizeRec(ArrayRef<Value *> Bndl) {
201233
break;
202234
}
203235
case LegalityResultID::Pack: {
204-
// TODO: Unimplemented
205-
llvm_unreachable("Unimplemented");
236+
// If we can't vectorize the seeds then just return.
237+
if (Depth == 0)
238+
return nullptr;
239+
NewVec = createPack(Bndl);
240+
break;
206241
}
207242
}
208243
return NewVec;
209244
}
210245

211246
bool BottomUpVec::tryVectorize(ArrayRef<Value *> Bndl) {
212247
DeadInstrCandidates.clear();
213-
vectorizeRec(Bndl);
248+
vectorizeRec(Bndl, /*Depth=*/0);
214249
tryEraseDeadInstrs();
215250
return Change;
216251
}

llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,3 +143,47 @@ define float @scalars_with_external_uses_not_dead(ptr %ptr) {
143143
ret float %ld0
144144
}
145145

146+
define void @pack_scalars(ptr %ptr, ptr %ptr2) {
147+
; CHECK-LABEL: define void @pack_scalars(
148+
; CHECK-SAME: ptr [[PTR:%.*]], ptr [[PTR2:%.*]]) {
149+
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
150+
; CHECK-NEXT: [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
151+
; CHECK-NEXT: [[LD0:%.*]] = load float, ptr [[PTR0]], align 4
152+
; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[PTR2]], align 4
153+
; CHECK-NEXT: [[PACK:%.*]] = insertelement <2 x float> poison, float [[LD0]], i32 0
154+
; CHECK-NEXT: [[PACK1:%.*]] = insertelement <2 x float> [[PACK]], float [[LD1]], i32 1
155+
; CHECK-NEXT: store <2 x float> [[PACK1]], ptr [[PTR0]], align 4
156+
; CHECK-NEXT: ret void
157+
;
158+
%ptr0 = getelementptr float, ptr %ptr, i32 0
159+
%ptr1 = getelementptr float, ptr %ptr, i32 1
160+
%ld0 = load float, ptr %ptr0
161+
%ld1 = load float, ptr %ptr2
162+
store float %ld0, ptr %ptr0
163+
store float %ld1, ptr %ptr1
164+
ret void
165+
}
166+
167+
declare void @foo()
168+
define void @cant_vectorize_seeds(ptr %ptr) {
169+
; CHECK-LABEL: define void @cant_vectorize_seeds(
170+
; CHECK-SAME: ptr [[PTR:%.*]]) {
171+
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
172+
; CHECK-NEXT: [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
173+
; CHECK-NEXT: [[LD0:%.*]] = load float, ptr [[PTR0]], align 4
174+
; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[PTR1]], align 4
175+
; CHECK-NEXT: store float [[LD1]], ptr [[PTR1]], align 4
176+
; CHECK-NEXT: call void @foo()
177+
; CHECK-NEXT: store float [[LD1]], ptr [[PTR1]], align 4
178+
; CHECK-NEXT: ret void
179+
;
180+
%ptr0 = getelementptr float, ptr %ptr, i32 0
181+
%ptr1 = getelementptr float, ptr %ptr, i32 1
182+
%ld0 = load float, ptr %ptr0
183+
%ld1 = load float, ptr %ptr1
184+
store float %ld1, ptr %ptr1
185+
call void @foo() ; This call blocks scheduling of the store seeds.
186+
store float %ld1, ptr %ptr1
187+
ret void
188+
}
189+

llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,3 +439,36 @@ define void @foo(i8 %v) {
439439
SmallVector<sandboxir::Instruction *> CBA({IC, IB, IA});
440440
EXPECT_EQ(sandboxir::VecUtils::getLowest(CBA), IC);
441441
}
442+
443+
TEST_F(VecUtilsTest, GetCommonScalarType) {
444+
parseIR(R"IR(
445+
define void @foo(i8 %v, ptr %ptr) {
446+
bb0:
447+
%add0 = add i8 %v, %v
448+
store i8 %v, ptr %ptr
449+
ret void
450+
}
451+
)IR");
452+
Function &LLVMF = *M->getFunction("foo");
453+
454+
sandboxir::Context Ctx(C);
455+
auto &F = *Ctx.createFunction(&LLVMF);
456+
auto &BB = *F.begin();
457+
auto It = BB.begin();
458+
auto *Add0 = cast<sandboxir::BinaryOperator>(&*It++);
459+
auto *Store = cast<sandboxir::StoreInst>(&*It++);
460+
auto *Ret = cast<sandboxir::ReturnInst>(&*It++);
461+
{
462+
SmallVector<sandboxir::Value *> Vec = {Add0, Store};
463+
EXPECT_EQ(sandboxir::VecUtils::tryGetCommonScalarType(Vec),
464+
Add0->getType());
465+
EXPECT_EQ(sandboxir::VecUtils::getCommonScalarType(Vec), Add0->getType());
466+
}
467+
{
468+
SmallVector<sandboxir::Value *> Vec = {Add0, Ret};
469+
EXPECT_EQ(sandboxir::VecUtils::tryGetCommonScalarType(Vec), nullptr);
470+
#ifndef NDEBUG
471+
EXPECT_DEATH(sandboxir::VecUtils::getCommonScalarType(Vec), ".*common.*");
472+
#endif // NDEBUG
473+
}
474+
}

0 commit comments

Comments
 (0)