Skip to content

Commit 62df6fe

Browse files
mshelegoigcbot
authored andcommitted
GenXDeadVectorRemoval predicate fix
Do not modify "all true" predicates even if a part of vector is dead. This can result in creation of unnecessary predicated instructions
1 parent e166627 commit 62df6fe

File tree

2 files changed

+15
-0
lines changed

2 files changed

+15
-0
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXDeadVectorRemoval.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,11 @@ Constant *GenXDeadVectorRemoval::trySimplify(Constant *C,
151151
if (!LiveElems.isAnyDead())
152152
return C;
153153

154+
// Do not modify "all true" predicates to avoid creation of redundant
155+
// execution masks
156+
if (C->getType()->getScalarType()->isIntegerTy(1) && C->isAllOnesValue())
157+
return C;
158+
154159
if (auto CA = dyn_cast<ConstantAggregate>(C))
155160
return trySimplify(CA, LiveElems);
156161

IGC/VectorCompiler/test/DeadVectorRemoval/const_wrregion.ll

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,13 @@ define <16 x i8> @test2(<16 x i8> %val) {
2727
%.sum = add <16 x i8> %.region1, %.region2
2828
ret <16 x i8> %.sum
2929
}
30+
31+
declare <4 x i32> @llvm.genx.wrregioni.v4i32.v4i32.i16.v4i1(<4 x i32>, <4 x i32>, i32, i32, i32, i16, i32, <4 x i1>) readnone nounwind
32+
declare i32 @llvm.genx.rdregioni.i32.v4i32.i16(<4 x i32>, i32, i32, i32, i16, i32) readnone nounwind
33+
34+
define i32 @test3(<4 x i32> %val) {
35+
; CHECK: %1 = call <4 x i32> @llvm.genx.wrregioni.v4i32.v4i32.i16.v4i1(<4 x i32> undef, <4 x i32> %val, i32 0, i32 4, i32 1, i16 0, i32 undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
36+
%1 = call <4 x i32> @llvm.genx.wrregioni.v4i32.v4i32.i16.v4i1(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> %val, i32 0, i32 4, i32 1, i16 0, i32 undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
37+
%.region = tail call i32 @llvm.genx.rdregioni.i32.v4i32.i16(<4 x i32> %1, i32 0, i32 1, i32 0, i16 4, i32 undef)
38+
ret i32 %.region
39+
}

0 commit comments

Comments
 (0)